In [97]:
# 작업 환경 설정
import matplotlib.dates as md
import pandas as pd
import numpy as np

%matplotlib inline

#### 날씨 데이터 불러오기
- ASOS 관측소의 데이터만 사용
- area 184(제주), 185(고산), 188(성산), 189(서귀포)

In [98]:
weather_df = pd.read_csv("weather_v2.csv")
weather_df = weather_df[weather_df["station"] == "ASOS"]

##### datetime 컬럼을 date와 time으로 나누기

In [99]:
def split_date(data, col):
    date = data[col].str.split(' ').str[0]
    time = data[col].str.split(' ').str[1]
    return date, time

In [100]:
weather_df["date"], weather_df["time"] = split_date(weather_df, "datetime")

##### 날씨 데이터에서 사용할 데이터만 추출
- 온도, 풍속, 풍향, 습도(상대) 사용
- 체감온도 및 불쾌지수 추가
    - 체감온도
        - 기상자료개방포털 자료 활용 (각 관측소 184(제주), 185(고산), 188(성산), 189(서귀포)에서 측정된 일간 체감온도)
    - 불쾌지수
        - 0.81 * 섭씨온도 +0.01 * 상대습도(%)（0.99*섭씨온도 - 14.3）+ 46.3
        
    - 자료 출처 : 체감온도 - 기상청, 불쾌지수 - 위키백과

In [101]:
weather_useful = weather_df[["date", "temp", "ws", "wd", "humid"]]

In [102]:
weather_useful['date'] = pd.to_datetime(weather_useful['date'])

In [103]:
# 불쾌지수 연산 함수, 소수점 첫째짜리까지
def discomfort_index(temp, rh):
    discomfort = round(0.81 * temp + 0.01 * rh * (0.99 * temp - 14.3) + 46.3, 1)
    return discomfort

In [104]:
# 불쾌지수 추가
weather_useful["angry"] = discomfort_index(weather_useful["temp"], weather_useful["humid"])

In [105]:
weather_useful = weather_useful.groupby("date").mean().reset_index()

In [107]:
weather_useful[["temp", "ws", "wd", "humid", "angry"]]= weather_useful[["temp", "ws", "wd", "humid", "angry"]].round(1)
weather_useful

Unnamed: 0,date,temp,ws,wd,humid,angry
0,2018-02-01,4.3,3.9,201.2,60.1,43.7
1,2018-02-02,3.8,4.3,276.8,55.3,43.5
2,2018-02-03,1.3,6.9,314.9,67.3,38.6
3,2018-02-04,-1.0,7.0,314.0,72.0,34.4
4,2018-02-05,-0.1,6.5,304.7,71.2,35.9
...,...,...,...,...,...,...
833,2020-05-14,18.7,2.7,174.2,56.3,63.7
834,2020-05-15,19.9,3.5,208.1,91.5,67.3
835,2020-05-16,17.6,2.0,205.0,96.6,63.5
836,2020-05-17,17.1,2.2,176.8,91.6,62.4


In [108]:
# 체감온도 추가
body_temp = pd.read_csv("body_temp.csv")
body_temp = body_temp.drop("Unnamed: 0", axis = 1)

In [109]:
# 4개의 관측소의 평균 일별 체감온도
weather_useful["body_temp"] = body_temp.groupby("date").mean().reset_index()["body_temp"].round(1)

In [110]:
weather_useful

Unnamed: 0,date,temp,ws,wd,humid,angry,body_temp
0,2018-02-01,4.3,3.9,201.2,60.1,43.7,1.9
1,2018-02-02,3.8,4.3,276.8,55.3,43.5,1.0
2,2018-02-03,1.3,6.9,314.9,67.3,38.6,-2.8
3,2018-02-04,-1.0,7.0,314.0,72.0,34.4,-5.8
4,2018-02-05,-0.1,6.5,304.7,71.2,35.9,-4.3
...,...,...,...,...,...,...,...
833,2020-05-14,18.7,2.7,174.2,56.3,63.7,18.6
834,2020-05-15,19.9,3.5,208.1,91.5,67.3,19.8
835,2020-05-16,17.6,2.0,205.0,96.6,63.5,17.4
836,2020-05-17,17.1,2.2,176.8,91.6,62.4,17.0


In [111]:
weather_useful.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 838 entries, 0 to 837
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       838 non-null    datetime64[ns]
 1   temp       838 non-null    float64       
 2   ws         838 non-null    float64       
 3   wd         838 non-null    float64       
 4   humid      838 non-null    float64       
 5   angry      838 non-null    float64       
 6   body_temp  838 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 46.0 KB


##### target 데이터 불러오기
- 일별 최대 smp, 최소 smp, 평균 smp, 전력 수요량

In [112]:
# target 데이터 불러오기
target = pd.read_csv("target_v2.csv")
# target의 날짜 데이터의 자료형을 str -> datetime으로 변환
target['date'] = pd.to_datetime(target['date'])

In [None]:
# 풍향(wd), 상대습도(humid) 단위 변환
# 풍향 디그리 -> 라디안, 상대습도 % ->  / 100 일단 소수로 표기

In [113]:
# 날씨 데이터와 병합
target = target.merge(weather_useful)

In [114]:
# 컬럼 재정렬
target = target[["date", "smp_max", "smp_min", "smp_mean", "temp", "ws", "wd", "humid", "angry", "body_temp", "supply"]]
target.head()

Unnamed: 0,date,smp_max,smp_min,smp_mean,temp,ws,wd,humid,angry,body_temp,supply
0,2018-02-01,150.65,116.84,132.71,4.3,3.9,201.2,60.1,43.7,1.9,87.47
1,2018-02-02,163.86,116.84,134.19,3.8,4.3,276.8,55.3,43.5,1.0,86.64
2,2018-02-03,164.07,116.85,131.39,1.3,6.9,314.9,67.3,38.6,-2.8,88.28
3,2018-02-04,171.0,115.76,131.89,-1.0,7.0,314.0,72.0,34.4,-5.8,86.14
4,2018-02-05,170.34,123.89,137.96,-0.1,6.5,304.7,71.2,35.9,-4.3,90.63
