In [41]:
# 작업 환경 설정
import matplotlib.dates as md
import pandas as pd
import numpy as np

%matplotlib inline

#### 날씨 데이터 불러오기
- ASOS 관측소의 데이터만 사용
- area 184(제주), 185(고산), 188(성산), 189(서귀포)

In [20]:
weather_df = pd.read_csv("weather_v2.csv")
weather_df = weather_df[weather_df["station"] == "ASOS"]

##### datetime 컬럼을 date와 time으로 나누기

In [25]:
def split_date(data, col):
    date = data[col].str.split(' ').str[0]
    time = data[col].str.split(' ').str[1]
    return date, time

In [26]:
weather_df["date"], weather_df["time"] = split_date(weather_df, "datetime")

##### 날씨 데이터에서 사용할 데이터만 추출
- 온도, 풍속, 풍향, 습도(상대) 사용
- 체감온도 및 불쾌지수 추가
    - 체감온도
        - 기상자료개방포털 자료 활용 (각 관측소 184(제주), 185(고산), 188(성산), 189(서귀포)에서 측정된 일간 체감온도)
    - 불쾌지수
        - 0.81 * 섭씨온도 +0.01 * 상대습도(%)（0.99*섭씨온도 - 14.3）+ 46.3
        
    - 자료 출처 : 체감온도 - 기상청, 불쾌지수 - 위키백과

In [28]:
weather_useful = weather_df[["date", "temp", "ws", "wd", "humid"]]

In [29]:
weather_useful['date'] = pd.to_datetime(weather_useful['date'])

In [74]:
# 불쾌지수 연산 함수, 소수점 첫째짜리까지
def discomfort_index(temp, rh):
    discomfort = round(0.81 * temp + 0.01 * rh * (0.99 * temp - 14.3) + 46.3, 1)
    return discomfort

In [77]:
# 불쾌지수 추가
weather_useful["angry"] = discomfort_index(weather_useful["temp"], weather_useful["humid"])

In [78]:
weather_useful = weather_useful.groupby("date").mean().reset_index()

In [79]:
weather_useful

Unnamed: 0,date,temp,ws,wd,humid,angry
0,2018-02-01,4.346739,3.859783,201.195652,60.119565,43.8
1,2018-02-02,3.760417,4.320833,276.770833,55.281250,43.5
2,2018-02-03,1.337500,6.854167,314.895833,67.281250,38.7
3,2018-02-04,-1.017708,7.000000,313.958333,72.010417,34.5
4,2018-02-05,-0.077083,6.506250,304.687500,71.239583,36.0
...,...,...,...,...,...,...
833,2020-05-14,18.710417,2.708333,174.166667,56.291667,63.8
834,2020-05-15,19.930208,3.497917,208.125000,91.500000,67.4
835,2020-05-16,17.560417,1.961458,205.000000,96.583333,63.5
836,2020-05-17,17.070833,2.247917,176.770833,91.635417,62.5


In [80]:
# 체감온도 추가
body_temp = pd.read_csv("body_temp.csv")
body_temp = body_temp.drop("Unnamed: 0", axis = 1)

In [83]:
# 4개의 관측소의 평균 일별 체감온도
weather_useful["body_temp"] = body_temp.groupby("date").mean().reset_index()["body_temp"]

In [84]:
weather_useful

Unnamed: 0,date,temp,ws,wd,humid,angry,body_temp
0,2018-02-01,4.346739,3.859783,201.195652,60.119565,43.8,1.900
1,2018-02-02,3.760417,4.320833,276.770833,55.281250,43.5,0.950
2,2018-02-03,1.337500,6.854167,314.895833,67.281250,38.7,-2.775
3,2018-02-04,-1.017708,7.000000,313.958333,72.010417,34.5,-5.750
4,2018-02-05,-0.077083,6.506250,304.687500,71.239583,36.0,-4.300
...,...,...,...,...,...,...,...
833,2020-05-14,18.710417,2.708333,174.166667,56.291667,63.8,18.650
834,2020-05-15,19.930208,3.497917,208.125000,91.500000,67.4,19.850
835,2020-05-16,17.560417,1.961458,205.000000,96.583333,63.5,17.425
836,2020-05-17,17.070833,2.247917,176.770833,91.635417,62.5,17.000


In [85]:
weather_useful.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 838 entries, 0 to 837
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       838 non-null    datetime64[ns]
 1   temp       838 non-null    float64       
 2   ws         838 non-null    float64       
 3   wd         838 non-null    float64       
 4   humid      838 non-null    float64       
 5   angry      838 non-null    float64       
 6   body_temp  838 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 46.0 KB


##### target 데이터 불러오기
- 일별 최대 smp, 최소 smp, 평균 smp, 전력 수요량

In [88]:
# target 데이터 불러오기
target = pd.read_csv("target_v2.csv")
# target의 날짜 데이터의 자료형을 str -> datetime으로 변환
target['date'] = pd.to_datetime(target['date'])

In [89]:
target = target.merge(weather_useful)

In [90]:
target

Unnamed: 0,date,smp_max,smp_min,smp_mean,supply,temp,ws,wd,humid,angry,body_temp
0,2018-02-01,150.65,116.84,132.71,87.47,4.346739,3.859783,201.195652,60.119565,43.8,1.900
1,2018-02-02,163.86,116.84,134.19,86.64,3.760417,4.320833,276.770833,55.281250,43.5,0.950
2,2018-02-03,164.07,116.85,131.39,88.28,1.337500,6.854167,314.895833,67.281250,38.7,-2.775
3,2018-02-04,171.00,115.76,131.89,86.14,-1.017708,7.000000,313.958333,72.010417,34.5,-5.750
4,2018-02-05,170.34,123.89,137.96,90.63,-0.077083,6.506250,304.687500,71.239583,36.0,-4.300
...,...,...,...,...,...,...,...,...,...,...,...
833,2020-05-14,193.28,66.78,100.46,62.70,18.710417,2.708333,174.166667,56.291667,63.8,18.650
834,2020-05-15,198.23,61.81,102.38,64.91,19.930208,3.497917,208.125000,91.500000,67.4,19.850
835,2020-05-16,220.91,88.50,121.19,61.75,17.560417,1.961458,205.000000,96.583333,63.5,17.425
836,2020-05-17,207.75,65.78,116.82,61.55,17.070833,2.247917,176.770833,91.635417,62.5,17.000
