In [1]:
import pandas as pd
import numpy as np
import random
import os

from tqdm import tqdm
from autogluon.tabular import TabularDataset, TabularPredictor


train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')
sample_submission = pd.read_csv('./sample_submission.csv')
building_df = pd.read_csv('./building_info.csv')

from datetime import datetime, timedelta

def classify_weekdays(start_date, end_date):
    date_format = "%Y-%m-%d"
    start = datetime.strptime(start_date, date_format)
    end = datetime.strptime(end_date, date_format)
    
    classification = []
    current_date = start
    
    while current_date <= end:
        if current_date.weekday() < 5:  # 0: 월요일, 1: 화요일, ..., 4: 금요일
            classification.append(0)  # 평일
        else:
            classification.append(1)  # 주말
        
        current_date += timedelta(days=1)
    
    return classification

# 예시 데이터 프레임 생성
data = {'date': pd.date_range(start='2022-06-01', end='2022-08-24')}
train = pd.DataFrame(data)
t_data = {'date': pd.date_range(start='2022-08-25', end='2022-08-31')}
test = pd.DataFrame(t_data)

# 주말/평일 분류한 결과를 'holidays' 열에 추가
start_date = "2022-06-01"
end_date = "2022-08-24"
test_start_date = "2022-08-25"
test_end_date = "2022-08-31"
train['holidays'] = classify_weekdays(start_date, end_date)
train_df['holidays'] = train['holidays']
test['holidays'] = classify_weekdays(test_start_date, test_end_date)
test_df['holidays'] = test['holidays']
#added
train_df['일시'] = pd.to_datetime(train_df['일시'], format='%Y%m%d %H')
test_df['일시'] = pd.to_datetime(test_df['일시'], format='%Y%m%d %H')

# date time feature 생성
train_df['hour'] = train_df['일시'].dt.hour
train_df['day'] = train_df['일시'].dt.day
train_df['month'] = train_df['일시'].dt.month
train_df['year'] = train_df['일시'].dt.year

test_df['hour'] = test_df['일시'].dt.hour
test_df['day'] = test_df['일시'].dt.day
test_df['month'] = test_df['일시'].dt.month
test_df['year'] = test_df['일시'].dt.year

train_df['sin_time'] = np.sin(2*np.pi*train_df.hour/24)
train_df['cos_time'] = np.cos(2*np.pi*train_df.hour/24)

test_df['sin_time'] = np.sin(2*np.pi*test_df.hour/24)
test_df['cos_time'] = np.cos(2*np.pi*test_df.hour/24)
train_df['THI'] = 9/5*train_df['기온(C)'] - 0.55*(1-train_df['습도(%)']/100)*(9/5*train_df['습도(%)']-26)+32
test_df['THI'] = 9/5*test_df['기온(C)'] - 0.55*(1-test_df['습도(%)']/100)*(9/5*test_df['습도(%)']-26)+32
##############################################################


train_df = train_df.drop(columns = ['일조(hr)', '일사(MJ/m2)', '강수량(mm)', '풍속(m/s)'])
test_df = test_df.drop(columns = ['강수량(mm)', '풍속(m/s)'])
train_data = TabularDataset(train_df)
train_data.head()

label = '전력소비량(kWh)'
####################################################

# AutoGluon 모델 학습 및 결과 저장
predictor = TabularPredictor(label=label, problem_type='regression').fit(
                            train_data, presets=['best_quality'], auto_stack=True,
                            num_bag_folds=10, num_bag_sets=20, num_stack_levels=3, refit_full=True)

# 가장 좋은 모델 정보 출력
leaderboard = predictor.leaderboard()
print(leaderboard)

# 선택된 최적 모델 정보 출력
best_model = leaderboard.iloc[0]['model']
print("Best Model:", best_model)

# 선택된 최적 모델의 성능 정보 출력
best_model_info = predictor.get_model_best()
print("Best Model Info:", best_model_info)

test_data = TabularDataset(test_df)

y_pred = predictor.predict(test_data)
x = pd.DataFrame(y_pred)
sample_submission['answer'] = x['전력소비량(kWh)']
print(sample_submission)
sample_submission.to_csv('./baseline_submission_org.csv', index=False)


  from .autonotebook import tqdm as notebook_tqdm
No path specified. Models will be saved in: "AutogluonModels\ag-20230821_225132\"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=10, num_bag_sets=20
	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels\ag-20230821_225132\"
AutoGluon Version:  0.8.2
Python Version:     3.9.13
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.22000
Disk Space Avail:   25.20 GB / 511.33 GB (4.9%)
Train Data Rows:    204000
Train Data Columns: 13
Label Column: 전력소비량(kWh)
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    7751.52 MB
	Trai

                          model    score_val  pred_time_val    fit_time  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           WeightedEnsemble_L3  -128.394605      20.428010  161.196516                0.002827           1.068730            3       True          8
1          ExtraTreesMSE_BAG_L2  -128.437743      14.648240   60.588385                5.102211          16.640004            2       True          7
2           WeightedEnsemble_L4  -130.726208      31.088112  243.651568                0.003007           1.119422            4       True         11
3          ExtraTreesMSE_BAG_L3  -130.813888      25.346287  174.420966                4.921104          14.293180            3       True         10
4        RandomForestMSE_BAG_L2  -130.892380      15.322972  143.487782                5.776943          99.539400            2       True          6
5           WeightedEnsemble_L2  -132.037970       8.184768   44.727038                0.005276     