# 날씨 데이터 수집(기온&강수량)

- https://data.kma.go.kr/stcs/grnd/grndTaList.do?pgmNo=70 : 기온데이터
- https://data.kma.go.kr/stcs/grnd/grndRnList.do?pgmNo=69 : 강수량 데이터

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.font_manager as fm
font_name = fm.FontProperties(fname="C:\Windows\Fonts\\malgun.ttf").get_name()
plt.rc("font", family=font_name)

import matplotlib as mlp
mlp.rcParams["axes.unicode_minus"] = False

from datetime import datetime

### 기온 데이터 전처리

In [53]:
temp = pd.read_csv("temp.csv", encoding="euc-kr", skiprows=7)

In [54]:
temp.head()

Unnamed: 0,날짜,지점,평균기온(℃),최저기온(℃),최고기온(℃)
0,\t2003-12-01,전국,7.1,2.9,13.6
1,\t2003-12-02,전국,6.6,2.6,12.0
2,\t2003-12-03,전국,4.7,-0.4,11.2
3,\t2003-12-04,전국,3.2,-1.8,9.9
4,\t2003-12-05,전국,6.7,1.0,11.4


In [55]:
## 날짜 앞 \t 없애기 ##
for i in range(len(temp["날짜"])):
    temp["날짜"][i] = temp["날짜"][i][1:]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp["날짜"][i] = temp["날짜"][i][1:]


In [56]:
del temp["지점"]

In [57]:
temp.head()

Unnamed: 0,날짜,평균기온(℃),최저기온(℃),최고기온(℃)
0,2003-12-01,7.1,2.9,13.6
1,2003-12-02,6.6,2.6,12.0
2,2003-12-03,4.7,-0.4,11.2
3,2003-12-04,3.2,-1.8,9.9
4,2003-12-05,6.7,1.0,11.4


In [59]:
temp.columns = ['날짜', '평균기온', '최저기온', '최고기온']

In [60]:
temp.head()

Unnamed: 0,날짜,평균기온,최저기온,최고기온
0,2003-12-01,7.1,2.9,13.6
1,2003-12-02,6.6,2.6,12.0
2,2003-12-03,4.7,-0.4,11.2
3,2003-12-04,3.2,-1.8,9.9
4,2003-12-05,6.7,1.0,11.4


In [74]:
temp.to_csv("temp.csv", index=False)

### 강수량 데이터 전처리

In [65]:
rain = pd.read_csv("rain.csv", encoding="euc-kr", skiprows=7)

In [66]:
rain.head()

Unnamed: 0,날짜,지점,강수량(mm)
0,2003-12-01,전국,0.0
1,2003-12-02,전국,0.0
2,2003-12-03,전국,0.0
3,2003-12-04,전국,0.0
4,2003-12-05,전국,4.0


In [67]:
del rain["지점"]

In [68]:
rain.columns = ['날짜', '강수량']

In [69]:
rain.head()

Unnamed: 0,날짜,강수량
0,2003-12-01,0.0
1,2003-12-02,0.0
2,2003-12-03,0.0
3,2003-12-04,0.0
4,2003-12-05,4.0


In [75]:
rain.to_csv("rain.csv", index=False)

### 기온 데이터와 강수량 데이터 병합

In [63]:
temp["날짜"] = temp["날짜"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
rain["날짜"] = rain["날짜"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))

In [71]:
weather = pd.merge(temp, rain, how="left")

In [72]:
weather.head()

Unnamed: 0,날짜,평균기온,최저기온,최고기온,강수량
0,2003-12-01,7.1,2.9,13.6,0.0
1,2003-12-02,6.6,2.6,12.0,0.0
2,2003-12-03,4.7,-0.4,11.2,0.0
3,2003-12-04,3.2,-1.8,9.9,0.0
4,2003-12-05,6.7,1.0,11.4,4.0


In [76]:
weather.to_csv("weather.csv", index=False)