### 날씨 데이터 파싱
* [기상청날씨데이터](http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp)
* 파싱한 데이터를 dict, list 자료구조에 저장
* 자료구조의 데이터를 json 파일로 저장
* BeautifulSoup 의 find(), find_all(), select(), select_one() 함수 비교

In [1]:
!pip3 show lxml

Name: lxml
Version: 5.2.1
Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.
Home-page: https://lxml.de/
Author: lxml dev team
Author-email: lxml-dev@lxml.de
License: BSD-3-Clause
Location: C:\Users\r2com\anaconda3\Lib\site-packages
Requires: 
Required-by: parsel, Scrapy


In [1]:
import requests
from bs4 import BeautifulSoup

url = 'http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp'

res = requests.get(url)
print(res.status_code)

if res.ok:
    soup = BeautifulSoup(res.text, features='xml')
    
    title_tag = soup.find('title')
    print(type(title_tag), title_tag, title_tag.text)
    
    title_all_tag = soup.find_all('title')
    print(type(title_all_tag), title_all_tag)
    
    location_tag = soup.find('location', attrs={'wl_ver':3})
    print('==find() ', type(location_tag))

    location_tag2 = soup.select_one("location[wl_ver='3']")
    print('==select_one() ',type(location_tag2))

    loc_tag_all = soup.findAll('location', attrs={'wl_ver':3})
    print('==find_all() ', type(loc_tag_all), len(loc_tag_all))

    loc_tag_all2 = soup.select("location[wl_ver='3']")
    print('==select() ', type(loc_tag_all2))

200
<class 'bs4.element.Tag'> <title>기상청 육상 중기예보</title> 기상청 육상 중기예보
<class 'bs4.element.ResultSet'> [<title>기상청 육상 중기예보</title>, <title>전국 육상 중기예보 - 2024년 08월 07일 (수)요일 06:00 발표</title>, <title>전국 육상중기예보</title>]
==find()  <class 'bs4.element.Tag'>
==select_one()  <class 'bs4.element.Tag'>
==find_all()  <class 'bs4.element.ResultSet'> 41
==select()  <class 'bs4.element.ResultSet'>


'''
<location wl_ver="3">
    <province>서울ㆍ인천ㆍ경기도</province>
    <city>서울</city>
    <data>
        <mode>A02</mode>
        <tmEf>2022-08-28 00:00</tmEf>
        <wf>맑음</wf>
        <tmn>18</tmn>
        <tmx>28</tmx>
        <reliability/>
        <rnSt>0</rnSt>
    </data>
    <data>
        <mode>A02</mode>
        <tmEf>2022-08-28 12:00</tmEf>
        <wf>구름많음</wf>
        <tmn>18</tmn>
        <tmx>28</tmx>
        <reliability/>
        <rnSt>30</rnSt>
    </data>
</location>
{
  "province":"서울ㆍ인천ㆍ경기도",
  "city":"서울",
  "datas":[{"mode:"A02”,"tmEf”:"2022-01-31 00:00”,"wf":"맑음"},
            {"mode:"A02”,"tmEf”:"2022-01-31 12:00”,"wf":"구름많음"},{},{}]
}
'''

### 서울(city)의 날씨 데이터 Parsing

In [7]:
import requests
from bs4 import BeautifulSoup

url = 'http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp'
res = requests.get(url)
if res.ok:
    soup = BeautifulSoup(res.text, features='xml')
    location_dict = {} #dict()
    location_tag = soup.find('location',attrs={'wl_ver':3})
    #print(location_tag)
    location_dict['province'] = location_tag.find('province').text
    location_dict['city'] = location_tag.find('city').text
    data_tags = location_tag.findAll('data')
    '''
    <data>
        <mode>A02</mode>
        <tmEf>2022-08-28 12:00</tmEf>
        <wf>구름많음</wf>
        <tmn>18</tmn>
        <tmx>28</tmx>
        <reliability/>
        <rnSt>30</rnSt>
    </data>
    '''
    data_list = [] #[{},{},{}]
    for data_tag in data_tags:
        data_dict = {}
        data_dict['mode'] = data_tag.find('mode').text
        data_dict['tmEf'] = data_tag.find('tmEf').text
        data_dict['wf'] = data_tag.find('wf').text
        data_dict['tmn'] = data_tag.find('tmn').text
        data_dict['tmx'] = data_tag.find('tmx').text
        data_list.append(data_dict)        

    location_dict['datas'] = data_list
    print(location_dict)

{'province': '서울ㆍ인천ㆍ경기도', 'city': '서울', 'datas': [{'mode': 'A02', 'tmEf': '2024-08-10 00:00', 'wf': '구름많음', 'tmn': '26', 'tmx': '34'}, {'mode': 'A02', 'tmEf': '2024-08-10 12:00', 'wf': '구름많음', 'tmn': '26', 'tmx': '34'}, {'mode': 'A02', 'tmEf': '2024-08-11 00:00', 'wf': '구름많음', 'tmn': '26', 'tmx': '33'}, {'mode': 'A02', 'tmEf': '2024-08-11 12:00', 'wf': '구름많음', 'tmn': '26', 'tmx': '33'}, {'mode': 'A02', 'tmEf': '2024-08-12 00:00', 'wf': '맑음', 'tmn': '25', 'tmx': '32'}, {'mode': 'A02', 'tmEf': '2024-08-12 12:00', 'wf': '구름많음', 'tmn': '25', 'tmx': '32'}, {'mode': 'A02', 'tmEf': '2024-08-13 00:00', 'wf': '맑음', 'tmn': '26', 'tmx': '33'}, {'mode': 'A02', 'tmEf': '2024-08-13 12:00', 'wf': '구름많음', 'tmn': '26', 'tmx': '33'}, {'mode': 'A02', 'tmEf': '2024-08-14 00:00', 'wf': '맑음', 'tmn': '25', 'tmx': '34'}, {'mode': 'A02', 'tmEf': '2024-08-14 12:00', 'wf': '구름많음', 'tmn': '25', 'tmx': '34'}, {'mode': 'A01', 'tmEf': '2024-08-15 00:00', 'wf': '구름많음', 'tmn': '25', 'tmx': '34'}, {'mode': 'A01', 'tmEf

### 41개 City의  날씨 데이터 파싱

In [8]:
import requests
from bs4 import BeautifulSoup

url = 'http://www.kma.go.kr/weather/forecast/mid-term-rss3.jsp'

res = requests.get(url)
if res.ok:
    soup = BeautifulSoup(res.text, features="xml")
    location_tags = soup.find_all('location', attrs={'wl_ver':3})
    # print(len(location_tags))
    location_list = list()
    
    for location_tag in location_tags:    
        location_dict = {} #dict()

        # "province":"서울ㆍ인천ㆍ경기도"
        location_dict['province'] = location_tag.find('province').text
        # 서울
        location_dict['city'] = location_tag.find('city').text

        data_tags = location_tag.find_all('data')
        data_list = []
        for data_tag in data_tags:
            data_dict = dict()
            # {"mode:"A02","tmEf":"2022-01-31 00:00","wf":"맑음"}
            data_dict['mode'] = data_tag.find('mode').text
            data_dict['tmEf'] = data_tag.find('tmEf').text
            data_dict['wf'] = data_tag.find('wf').text
            data_dict['tmn'] = data_tag.find('tmn').text
            data_dict['tmx'] = data_tag.find('tmx').text
            data_list.append(data_dict)

        location_dict['datas'] = data_list
        
        location_list.append(location_dict)
        
print(len(location_list))
#print(location_list)

41


In [9]:
import warnings
warnings.filterwarnings('ignore')

#### weather.json 파일로 저장하기

In [10]:
import json

with open('data/weather.json','w',encoding='utf-8') as file:
    json.dump(location_list, file)

#### weather.json 파일을 읽어오기

In [11]:
import json

with open('data/weather.json',encoding='utf-8') as file:    # 'r'은 기본값이라 생략가능
    json_data = json.load(file)

#print(json_data)