In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import jpholiday
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

# 作業ディレクトリの設定
try:
    os.chdir('H:/マイドライブ/03_code_test/clustering-house_trial')
    data_path_header = 'G:/マイドライブ/01_研究/02_円山町/1_データ前処理'
except FileNotFoundError:
    os.chdir('G:/マイドライブ/03_code_test/clustering-house_trial')
    data_path_header = 'H:/マイドライブ/01_研究/02_円山町/1_データ前処理'
print("Current Working Directory: ", os.getcwd())


# カスタムライブラリのパスを追加
sys.path.append(data_path_header)
from column_translation import column_translation_dict


Current Working Directory:  G:\マイドライブ\03_code_test\clustering-house_trial


In [13]:
# データ読み込み関数
def load_data(house_num, start_date, end_date, col_list):
    df = pd.read_csv(os.path.join(data_path_header, f'12_一括処理後データ/大林新星和不動産/{house_num}号地/{house_num}_30Min.csv'), encoding='utf-8')
    df = df.rename(columns=column_translation_dict)
    df["time"] = pd.to_datetime(df["time"])
    df = df[(df["time"] >= start_date) & (df["time"] <= end_date)]
    df_data = df[['time'] + col_list]
    df_data.set_index('time', inplace=True)

    # 状態データの読み込み
    df_state = pd.DataFrame()
    for column in col_list:
        try:
            file_path_list = glob.glob(f'../GMM-HMM_Trial/output_HMM/{house_num}号地/{column}/*_mode.csv')
        except FileNotFoundError:
            print(f'No mode file found for {column} in house {house_num}. Skipping.')
            col_list.remove(column)
            continue
        df_mode = pd.read_csv(file_path_list[-1], encoding='utf-8', index_col='time', parse_dates=['time'], usecols=['time', 'mode'])
        df_mode = df_mode.rename(columns={'mode': f'{column}_state'})
        if df_state.empty:
            df_state = df_mode
        else:
            df_state = pd.merge(df_state, df_mode, left_index=True, right_index=True)

    df_data = pd.merge(df_data, df_state, left_index=True, right_index=True)
    return df_data, col_list

# 閾値の読み込み関数
def load_thresholds(house_num, col_list):
    file_path_list = glob.glob(f'../GMM-HMM_Trial/output_HMM/{house_num}号地/{col_list[0]}/*_result.csv')
    thresholds_csv = pd.read_csv(file_path_list[-1], encoding='utf-8')
    return thresholds_csv

# 1. 合計消費量
def calc_mean_consumption(df_data, column):
    mean_annual = df_data[column].mean()
    mean_monthly = df_data[column].resample('M').mean()
    mean_monthly.index = mean_monthly.index.to_period('M')
    return round(mean_annual, 2), mean_monthly.round(2)

# 2. 時刻別消費量（0-6, 6-12, 12-18, 18-24）
def calc_time_bin_consumption(df_data, column):
    def time_bin(hour):
        if 0 <= hour < 6:
            return '0-6'
        elif 6 <= hour < 12:
            return '6-12'
        elif 12 <= hour < 18:
            return '12-18'
        else:
            return '18-24'
    df_data['time_bin'] = df_data.index.hour.map(time_bin)
    time_bin_monthly = df_data.groupby([df_data.index.to_period('M'), 'time_bin'])[column].mean().unstack()
    return time_bin_monthly.round(2)

# --- 2. 時刻別消費量（3時間区間・月別平均） ---
def calc_time_bin_consumption(df_data, column):
    # 3時間区間に分類
    df_data['hour_bin'] = (df_data.index.hour // 3) * 3  # 0,3,6,...,21
    
    # 月ごとの時間区間平均
    monthly_time_bin = df_data.groupby([df_data.index.to_period('M'), 'hour_bin'])[column].mean().unstack(level=1)
    
    # 列名を見やすく
    monthly_time_bin.columns = [f'{h}-{h+3}' for h in monthly_time_bin.columns]
    
    return monthly_time_bin.round(2)


# 3. PCR（ピーク消費比率）
def calc_pcr(df_data, column):
    daily = df_data[column].resample('D')
    daily_mean = daily.mean()
    daily_max = daily.max()
    pcr_daily = (daily_max / daily_mean).replace([np.inf, -np.inf], np.nan)
    pcr_monthly = pcr_daily.resample('M').mean()
    pcr_monthly.index = pcr_monthly.index.to_period('M')
    return pcr_monthly.round(2)

# 4. 日夜消費量比率
def calc_day_night_ratio(df_data, column):
    def day_night(hour):
        return 'day' if 6 <= hour < 18 else 'night'
    df_data['day_night'] = df_data.index.hour.map(day_night)
    monthly_sum = df_data.groupby([df_data.index.to_period('M'), 'day_night'])[column].sum().unstack()
    monthly_sum['day_night_ratio'] = monthly_sum['day'] / monthly_sum['night']
    return monthly_sum['day_night_ratio'].round(2)

# --- 5. 平日・休日消費量比率（祝日対応版） ---
def calc_weekday_weekend_ratio(df_data, column):
    df_data['weekday'] = df_data.index.weekday
    df_data['is_holiday'] = df_data.index.to_series().apply(lambda x: jpholiday.is_holiday(x))
    df_data['day_type'] = df_data.apply(
        lambda row: 'weekend' if row['weekday'] >= 5 or row['is_holiday'] else 'weekday', axis=1
    )
    monthly_sum = df_data.groupby([df_data.index.to_period('M'), 'day_type'])[column].sum().unstack()
    monthly_sum['weekday_weekend_ratio'] = monthly_sum['weekday'] / monthly_sum['weekend']
    return monthly_sum['weekday_weekend_ratio'].round(2)

# --- 6. 稼働時間に対する消費量 ---
def calc_consumption_per_active_hour(df_data, column):
    df_data['is_active'] = df_data[f'{column}_state'] >= 2
    monthly_active_hours = df_data.groupby(df_data.index.to_period('M'))['is_active'].sum() * 0.5
    active_df = df_data[df_data['is_active']].copy()
    monthly_active_consumption = active_df.groupby(active_df.index.to_period('M'))[column].sum()
    monthly_ratio = monthly_active_consumption / monthly_active_hours
    return monthly_active_hours, monthly_ratio.round(2)

# --- 6. 稼働時間に対する消費量 + 時刻別稼働確率（3時間区間） ---
# --- 6. 稼働時間に対する消費量 + 月別・3時間区間稼働確率 ---
def calc_consumption_per_active_hour(df_data, column):
    # 稼働判定
    df_data['is_active'] = df_data[f'{column}_state'] >= 2
    
    # 月ごとの稼働時間（30分単位）
    monthly_active_hours = df_data.groupby(df_data.index.to_period('M'))['is_active'].sum() * 0.5
    
    # 月ごとの稼働消費量
    active_df = df_data[df_data['is_active']].copy()
    monthly_active_consumption = active_df.groupby(active_df.index.to_period('M'))[column].sum()
    
    # 月ごとの稼働消費量 / 稼働時間
    monthly_ratio = monthly_active_consumption / monthly_active_hours
    
    # --- 月別・3時間区間稼働確率 ---
    df_data['month'] = df_data.index.to_period('M')
    df_data['hour_bin'] = (df_data.index.hour // 3) * 3  # 0,3,6,...,21
    
    # 月×時間区間で稼働回数と総サンプル数を集計
    grouped = df_data.groupby(['month', 'hour_bin'])
    active_counts = grouped['is_active'].sum()
    total_counts = grouped['is_active'].count()
    
    # 月別・時間区間稼働確率
    hourly_active_probability = (active_counts / total_counts).unstack(level=1).round(2)
    
    # 列名を見やすく
    hourly_active_probability.columns = [f'{h}-{h+3}' for h in hourly_active_probability.columns]
    
    return monthly_active_hours, monthly_ratio.round(2), hourly_active_probability




In [16]:
house_list = [156]
start_date = '2024-04-01 00:00:00'
end_date = '2024-06-30 23:30:00'
col_list = ['electric_demand', 'LD', 'kitchen', 'bedroom', 'washing_machine', 'dishwasher']


for house_num in house_list:
    print(f"\n=== {house_num}号地 の分析 ===")
    df_data, col_list = load_data(house_num, start_date, end_date, col_list)
    thresholds_csv = load_thresholds(house_num, col_list)
    for column in col_list:
        time_bin_monthly = calc_time_bin_consumption(df_data, column)
        print(f"時刻別平均消費量: \n{time_bin_monthly}")
        active_hours, consumption_per_active_hour, hourly_active_probability = calc_consumption_per_active_hour(df_data, column)
        print(f"=== {column} の分析結果 ===")
        print(f"稼働時間: {active_hours}")
        print(f"稼働時間あたりの消費量: {consumption_per_active_hour}")
        print(f"時刻別稼働確率: \n{hourly_active_probability}")


=== 156号地 の分析 ===
時刻別平均消費量: 
            0-3     3-6     6-9    9-12   12-15   15-18   18-21   21-24
time                                                                   
2024-04  238.18  108.10  160.81  164.98  150.18  155.42  324.89  284.81
2024-05  224.87  112.27  164.71  153.11  155.32  168.99  290.01  265.45
2024-06  215.70  108.52  175.77  159.51  153.12  194.69  292.07  292.61
=== electric_demand の分析結果 ===
稼働時間: time
2024-04    568.0
2024-05    600.0
2024-06    617.0
Freq: M, Name: is_active, dtype: float64
稼働時間あたりの消費量: time
2024-04    465.91
2024-05    441.78
2024-06    441.28
Freq: M, dtype: float64
時刻別稼働確率: 
          0-3   3-6   6-9  9-12  12-15  15-18  18-21  21-24
month                                                      
2024-04  0.94  0.43  0.63  0.79   0.81   0.70   1.00    1.0
2024-05  0.97  0.55  0.73  0.74   0.68   0.80   0.99    1.0
2024-06  0.89  0.57  0.87  0.86   0.81   0.84   1.00    1.0
時刻別平均消費量: 
           0-3   3-6    6-9   9-12  12-15  15-18   18-21   2

In [None]:
house_list = [156]
start_date = '2024-04-01 00:00:00'
end_date = '2024-06-30 23:30:00'
col_list = ['electric_demand', 'LD', 'kitchen', 'bedroom', 'washing_machine', 'dishwasher']


for house_num in house_list:
    print(f"\n=== {house_num}号地 の分析 ===")
    df_data, col_list = load_data(house_num, start_date, end_date, col_list)
    thresholds_csv = load_thresholds(house_num, col_list)
    for column in col_list:
        print(f"\n--- {column} の分析結果 ---")
        total_annual, total_monthly = calc_mean_consumption(df_data, column)
        time_bin_monthly = calc_time_bin_consumption(df_data, column)
        pcr_monthly = calc_pcr(df_data, column)
        day_night_ratio = calc_day_night_ratio(df_data, column)
        weekday_weekend_ratio = calc_weekday_weekend_ratio(df_data, column)
        active_hours, consumption_per_active_hour = calc_consumption_per_active_hour(df_data, column)

        # --- 結果表示 ---
        print("1. 平均消費量（年間）:", total_annual)

        # 月別指標を1つのDataFrameにまとめる
        df_monthly_result = pd.DataFrame({
            'total_consumption': total_monthly,
            'PCR': pcr_monthly,
            'day_night_ratio': day_night_ratio,
            'weekday_weekend_ratio': weekday_weekend_ratio,
            'consumption_per_active_hour': consumption_per_active_hour
        })
        # 時刻別消費量（月平均）は別DataFrameなので、列名を整理して結合
        time_bin_monthly.columns = [f'time_bin_{col}' for col in time_bin_monthly.columns]
        df_monthly_result = df_monthly_result.join(time_bin_monthly)
        # CSVに出力
        os.makedirs(f'./output_feature', exist_ok=True)
        df_monthly_result.to_csv(f'./output_feature/{house_num}_{column}_energy_metrics.csv', index_label='month')

    print("\tcomplete monthly metrics output to CSV.")




=== 156号地 の分析 ===

--- electric_demand の分析結果 ---
1. 平均消費量（年間）: 196.37

--- LD の分析結果 ---
1. 平均消費量（年間）: 63.47

--- kitchen の分析結果 ---
1. 平均消費量（年間）: 25.36

--- bedroom の分析結果 ---
1. 平均消費量（年間）: 9.61

--- washing_machine の分析結果 ---
1. 平均消費量（年間）: 1.03

--- dishwasher の分析結果 ---
1. 平均消費量（年間）: 22.15
	complete monthly metrics output to CSV.
