In [106]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime, timedelta

from sklearn.model_selection import train_test_split
import glob
import os
import time
import random
import warnings; warnings.filterwarnings("ignore")
from IPython.display import Image
import pickle
from tqdm import tqdm
import platform
from itertools import combinations
from scipy.stats.mstats import gmean

from tqdm.notebook import tqdm
from pycaret.regression import *
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [151]:
#data = pd.read_csv("smart_farm_final.csv").sample(n = 50000)
org_data = pd.read_csv("smart_farm_final.csv")
org_data.shape

(148168, 57)

In [152]:
data = org_data.drop(["시간","분","년월일"],axis=1)
data.drop(["차열스크린_광도조절","차광스크린_광도조절"],axis=1,inplace=True)

In [153]:
data["팬코일 B동"] = data["팬코일 B동"].apply(lambda x : 1 if x == 0.5 else x )

In [154]:
train = data.iloc[:129448,:]
test = data.iloc[129448:,:]

문자열 칼럼 추출

In [155]:
# 문자열 형식의 컬럼만 추출하기
object_lst = []

# 숫자 형식의 컬럼만 추출하기
num_lst = []

for col in data.columns:
    if data[col].dtype == "object":
        object_lst.append(col)
    
    else:
        num_lst.append(col)

In [156]:
# 카테고리열 제거한 numerical
# 진짜 숫자 관련 열만 존재

num_lst_real = [x for x in num_lst if x not in ['배기팬','유동팬','팬코일 B동', "차광스크린_개방","열공급량(kWh)"]]

object_lst_real = object_lst + ['배기팬','유동팬','팬코일 B동', "차광스크린_개방"]

object_no_binary = object_lst.copy()

In [157]:
data["차광스크린_개방"].value_counts()

0    134934
1     13234
Name: 차광스크린_개방, dtype: int64

In [158]:
object_no_binary

['차광스크린_개방조절',
 '차열스크린_온도조절',
 '차열스크린_개방조절',
 '시간범주',
 '일몰전후',
 '일출일몰',
 '오존등급',
 '아황산가스등급',
 '이산화질소등급',
 'PM10등급',
 'PM25등급']

In [159]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [160]:
for val in ['일사량(W/㎡)','외기온(℃)','외기습도(%)','풍속(m/s)', '풍향(º)','내부 온도','내부 습도','차광스크린(수평)','차열스크린(수평)',
 '에너지스크린(수평)','에너지스크린(수직)','천창','난방온도(설정온도)','누적강수량','현지기압','해면기압','PM10_1시간','PM25_1시간',
 '오존_1시간','이산화질소_1시간','일산화탄소_1시간','아황산가스_1시간','지면온도','환기온도(천창 제어온도)']:
    
    minmax = MinMaxScaler()
    
    data_scaled = minmax.fit_transform(data[val].values.reshape(-1,1))
    data[val] = data_scaled

In [161]:
for col in ["일조","년도","월",'일출시간', '일몰시간', '일출3시간후',
       '일출1시간후', '일몰3시간후', '일몰1시간반전', '하루길이']:
    
    minmax = MinMaxScaler()
    
    data_scaled = minmax.fit_transform(data[col].values.reshape(-1,1))
    data[col] = data_scaled

#### 1.1 데이터 처리하기
- 학습용 데이터 : ~2022.03
- 예측용 데이터 : 2022.03~

In [167]:
data_scaled = data.copy()

In [168]:
object_lst_real

['차광스크린_개방조절',
 '차열스크린_온도조절',
 '차열스크린_개방조절',
 '시간범주',
 '일몰전후',
 '일출일몰',
 '오존등급',
 '아황산가스등급',
 '이산화질소등급',
 'PM10등급',
 'PM25등급',
 '배기팬',
 '유동팬',
 '팬코일 B동',
 '차광스크린_개방']

In [169]:
Y = data_scaled["열공급량(kWh)"]

X_dummy = pd.get_dummies(data_scaled.drop(["열공급량(kWh)"],axis=1)
                         ,columns = object_lst + ["일","시"])

In [170]:
data_scaled_final = pd.concat([X_dummy, Y], axis = 1)

train_data = data_scaled_final.iloc[:129448,:]

test_data = data_scaled_final.iloc[129448:,:]

In [172]:
data_scaled_final.to_csv("smart_farm_final_scaled.csv",encoding="utf-8-sig",index=False)

In [166]:
data_scaled_final

Unnamed: 0,년도,월,시,일사량(W/㎡),외기온(℃),외기습도(%),풍속(m/s),풍향(º),내부 온도,내부 습도,차광스크린(수평),차열스크린(수평),에너지스크린(수평),에너지스크린(수직),배기팬,천창,유동팬,팬코일 B동,환기온도(천창 제어온도),난방온도(설정온도),누적강수량,현지기압,해면기압,일조,PM10_1시간,PM25_1시간,오존_1시간,이산화질소_1시간,일산화탄소_1시간,아황산가스_1시간,차광스크린_개방,일출시간,일몰시간,일출3시간후,일출1시간후,일몰3시간후,일몰1시간반전,하루길이,지면온도,차광스크린_개방조절_0%~30%개방,차광스크린_개방조절_100%개방,차광스크린_개방조절_30%~80%개방,차광스크린_개방조절_80%~100%개방,차열스크린_온도조절_23도~25도,차열스크린_온도조절_23도미만,차열스크린_온도조절_25도~26도,차열스크린_온도조절_26도이상,차열스크린_개방조절_0%~30%개방,차열스크린_개방조절_100%개방,차열스크린_개방조절_30%~70%개방,차열스크린_개방조절_70%~100%개방,시간범주_그외,시간범주_야간,시간범주_주간,일몰전후_일몰1시간반_전,일몰전후_일몰1시간반_후,일출일몰_일몰,일출일몰_일출,오존등급_보통,오존등급_좋음,아황산가스등급_보통,아황산가스등급_좋음,이산화질소등급_나쁨,이산화질소등급_보통,이산화질소등급_좋음,PM10등급_나쁨,PM10등급_보통,PM10등급_좋음,PM25등급_나쁨,PM25등급_보통,PM25등급_좋음,일_1,일_2,일_3,일_4,일_5,일_6,일_7,일_8,일_9,일_10,일_11,일_12,일_13,일_14,일_15,일_16,일_17,일_18,일_19,일_20,일_21,일_22,일_23,일_24,일_25,일_26,일_27,일_28,일_29,일_30,일_31,열공급량(kWh)
0,0.0,0.909091,0,0.0,0.495238,0.643678,0.000000,0.901408,0.359322,0.485804,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.506494,0.506452,0.757607,0.264957,0.283784,0.023810,0.320513,0.142857,0.04,0,0.771429,0.016129,0.771429,0.771429,0.016129,0.012195,0.219231,0.254545,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.9
1,0.0,0.909091,0,0.0,0.495238,0.643678,0.024242,0.954930,0.352542,0.507886,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.506494,0.506452,0.000000,0.264957,0.283784,0.023810,0.320513,0.142857,0.04,0,0.771429,0.016129,0.771429,0.771429,0.016129,0.012195,0.219231,0.254545,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.9
2,0.0,0.909091,0,0.0,0.492063,0.643678,0.054545,0.281690,0.338983,0.536278,0.0,0.0,0.0,0.0,0,0.0,1,1.0,0.333333,0.5,0.0,0.503247,0.503226,0.000000,0.264957,0.283784,0.023810,0.320513,0.142857,0.04,0,0.771429,0.016129,0.771429,0.771429,0.016129,0.012195,0.219231,0.254545,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.9
3,0.0,0.909091,0,0.0,0.492063,0.643678,0.078788,0.332394,0.338983,0.515773,0.0,0.0,0.0,0.0,0,0.0,1,1.0,0.333333,0.5,0.0,0.503247,0.503226,0.000000,0.264957,0.283784,0.023810,0.320513,0.142857,0.04,0,0.771429,0.016129,0.771429,0.771429,0.016129,0.012195,0.219231,0.254545,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.8
4,0.0,0.909091,0,0.0,0.492063,0.643678,0.054545,0.332394,0.335593,0.528391,0.0,0.0,0.0,0.0,0,0.0,1,1.0,0.333333,0.5,0.0,0.503247,0.503226,0.000000,0.264957,0.283784,0.023810,0.320513,0.142857,0.04,0,0.771429,0.016129,0.771429,0.771429,0.016129,0.012195,0.219231,0.254545,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148163,1.0,0.181818,23,0.0,0.507937,0.574713,0.024242,0.954930,0.367797,0.669558,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.493506,0.493548,0.824021,0.111111,0.162162,0.464286,0.076923,0.071429,0.08,0,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,1.000000,0.292929,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
148164,1.0,0.181818,23,0.0,0.507937,0.574713,0.000000,0.954930,0.367797,0.657098,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.493506,0.493548,0.824021,0.111111,0.162162,0.464286,0.076923,0.071429,0.08,0,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,1.000000,0.292929,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
148165,1.0,0.181818,23,0.0,0.504762,0.574713,0.024242,0.954930,0.364407,0.676498,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.493506,0.493548,0.824021,0.111111,0.162162,0.464286,0.076923,0.071429,0.08,0,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,1.000000,0.292929,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
148166,1.0,0.181818,23,0.0,0.504762,0.574713,0.024242,0.960563,0.366102,0.669716,0.0,0.0,0.0,0.0,0,0.0,1,0.0,0.333333,0.5,0.0,0.496753,0.496774,0.824021,0.111111,0.162162,0.464286,0.076923,0.071429,0.08,0,0.000000,1.000000,0.000000,0.000000,1.000000,1.000000,1.000000,0.292929,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
