# VA input data (.csv) 생성

In [1]:
import os
import fnmatch
from pandas import read_csv
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


data_dir   = '../Data' #'C:/Users/VISLAB_PHY/Desktop/Workspace/Data'
date_start = '10190901'
date_end   = '30191201'
err_date_list =   [ '20190912',
                    '20191122',
                    '20191130',
                    '20191217',
                    '20200501',
                    '20200502',
                    '20191028',
                    '20191107',
                    '20191108',
                    '20191109',
                    '20191110',
                    '20191111',
                    '20191112',
                    '20200214',
                    '20200307',
                    '20200308',
                    '20200309',
                    '20200310',
                    '20200328',
                    '20200329',
                    '20200625',
                    '20200809']

### WEATHER .csv

In [39]:
#############################################
# 종관기상관측
#############################################
# pow 파일 load
file_list   = os.listdir(data_dir)
print(len(file_list))
for filename in os.listdir(data_dir):
    if fnmatch.fnmatch(filename, 'OBS_ASOS_TIM_*.csv'):
        print(filename)

        # load csv data
        dataset = read_csv(data_dir+'/'+filename, encoding='CP949')
        dataset.drop(['지점','지점명'], axis=1, inplace=True)
        dataset.drop(['기온 QC플래그','강수량 QC플래그','풍속 QC플래그','풍향 QC플래그','습도 QC플래그'], axis=1, inplace=True)
        dataset.drop(['현지기압 QC플래그','해면기압 QC플래그','일조 QC플래그','지면온도 QC플래그'], axis=1, inplace=True)
        dataset.drop(['5cm 지중온도(°C)','10cm 지중온도(°C)','20cm 지중온도(°C)','30cm 지중온도(°C)'], axis=1, inplace=True)
        dataset.drop(['3시간신적설(cm)','일사(MJ/m2)','운형(운형약어)','지면상태(지면상태코드)','현상번호(국내식)'], axis=1, inplace=True)

        # set column name
        dataset.columns = ['ymdhms', 'temprt', 'rain', 'wnd_spd', 'wnd_dir', 'humdt','steampressr',
                           'dewpnt', 'pressr','seapressr','sunshine','snow','cloud','cloud2','mincloud','visiblt','grd_temprt']

        # set NA data (관측값 0이 누적되어 결측된 경우. 0으로 세팅)
        dataset['rain'].fillna(0, inplace=True)     #강수량
        dataset['sunshine'].fillna(0, inplace=True) #일조
        dataset['snow'].fillna(0, inplace=True)     #적설량

        #일시 패턴 변환(2019-08-20 5:00 -> 2019082005)
        dataset['ymdhms'] = dataset['ymdhms'].str[0:4]+dataset['ymdhms'].str[5:7]+dataset['ymdhms'].str[8:10]+dataset['ymdhms'].str[11:13]
        # pow측정값 중 결측값 많은 일자 제거
        dataset = dataset[(dataset['ymdhms'].str[0:8]>=date_start) & (dataset['ymdhms'].str[0:8]<date_end)]
        for err_date in err_date_list:
            idx_err = dataset[dataset['ymdhms'].str.startswith(err_date)].index
            dataset = dataset.drop(idx_err)
            
        # save file (test용)
        dataset.to_csv(data_dir+"/va_weather.csv",mode='w',index=False)
        weather_dataset = dataset

13
OBS_ASOS_TIM_20200929025447.csv


### POW .csv

In [16]:
#############################################
# 태양광 전력
#############################################

# pow 파일 load
dir_path    = data_dir+"/pow_24/UR00000126_csv"
file_list   = os.listdir(dir_path)
print(len(file_list))
hrPow  = []    

# pow측정값 에러가 큰 일자 제거
for filename in file_list:
    if (filename[:-4] not in err_date_list):
        if ((filename[:-4]>=date_start) & (filename<date_end)):
            filedata = pd.read_csv(dir_path+'/'+filename).values[:,0]
            hrPow.append(filedata)

#낮시간 추출 (5~20시)
pow_dataset = pd.DataFrame(hrPow)
pow_dataset[23] = 0
#pow_dataset =pow_dataset.iloc[:,powhr_start:powhr_end+1]

# 결측값 보간, reshape
pow_dataset = pow_dataset.interpolate(method='linear')
pow_dataset = pow_dataset.values.reshape(-1,1)
pow_dataset = pd.DataFrame(pow_dataset)
pow_dataset.columns = ['pow']
pow_dataset.to_csv(data_dir+"/va_pow.csv",mode='w',index=False)


337


### TOTAL .csv

In [18]:
df_temp = np.concatenate((pow_dataset, weather_dataset),axis=-1)#(984,7)
#col_list =  col_list[0:data_dim]
col_list = dataset.columns.values
col_list = np.insert(col_list, 0, 'pow')

total_dataset = pd.DataFrame(data=df_temp, columns= [col_list])
total_dataset.to_csv(data_dir+"/va_total.csv",mode='w',index=False)

### PEARSON CORRELATION

In [56]:
# correlation 위한 minmax scaler



scaler = MinMaxScaler(feature_range = (0, 1))
scaled_total = scaler.fit_transform(total_dataset.values)
df_total = pd.DataFrame(scaled_total, columns=total_dataset.columns, index=list(total_dataset.index.values))

corr = df_total.corr('pearson')
corr = corr.iloc[0:1,:]
display(corr)
print(corr)
print(type(corr))
print(df_total)

Unnamed: 0,pow,ymdhms,temprt,rain,wnd_spd,wnd_dir,humdt,steampressr,dewpnt,pressr,seapressr,sunshine,snow,cloud,cloud2,mincloud,visiblt,grd_temprt
pow,1.0,0.101944,0.150462,-0.085057,0.385014,0.252846,-0.553287,-0.164608,-0.166242,0.054445,0.037591,0.739553,-0.028469,-0.249402,-0.248543,0.006577,0.343229,0.29424


     pow    ymdhms    temprt      rain   wnd_spd   wnd_dir     humdt  \
pow  1.0  0.101944  0.150462 -0.085057  0.385014  0.252846 -0.553287   

    steampressr    dewpnt    pressr seapressr  sunshine      snow     cloud  \
pow   -0.164608 -0.166242  0.054445  0.037591  0.739553 -0.028469 -0.249402   

       cloud2  mincloud   visiblt grd_temprt  
pow -0.248543  0.006577  0.343229    0.29424  
<class 'pandas.core.frame.DataFrame'>
      pow    ymdhms    temprt     rain   wnd_spd   wnd_dir     humdt  \
0     0.0  0.000000  0.741117  0.00000  0.019231  0.000000  0.988636   
1     0.0  0.000001  0.741117  0.00000  0.009615  0.000000  0.977273   
2     0.0  0.000002  0.730964  0.00000  0.038462  0.000000  0.988636   
3     0.0  0.000003  0.713198  0.00000  0.028846  0.000000  0.988636   
4     0.0  0.000004  0.708122  0.00000  0.000000  0.000000  0.988636   
...   ...       ...       ...      ...       ...       ...       ...   
8011  0.0  0.999996  0.837563  0.00000  0.038462  0.000000  

In [54]:
result = corr.to_json(orient="records")
display(result)

'[{"(\'pow\',)":1.0,"(\'ymdhms\',)":0.1019436574,"(\'temprt\',)":0.1504616378,"(\'rain\',)":-0.0850566725,"(\'wnd_spd\',)":0.3850139107,"(\'wnd_dir\',)":0.2528462302,"(\'humdt\',)":-0.5532867373,"(\'steampressr\',)":-0.1646079651,"(\'dewpnt\',)":-0.1662420735,"(\'pressr\',)":0.0544445658,"(\'seapressr\',)":0.0375909448,"(\'sunshine\',)":0.7395530803,"(\'snow\',)":-0.0284692386,"(\'cloud\',)":-0.2494015037,"(\'cloud2\',)":-0.2485428769,"(\'mincloud\',)":0.0065765615,"(\'visiblt\',)":0.3432285658,"(\'grd_temprt\',)":0.2942399588}]'