### 기본 세팅

In [1]:
import time
from datetime import date, timedelta
import pandas as pd

from crawling_daumnews import get_daumnews_info
from crawling_dcinside import get_dcinside_info
from crawling_jongto import get_jongto_info
from crawling_navernews import get_navernews_info
from crawling_youtube import get_youtube_info

In [2]:
# 어제 날짜 저장
def get_yesterday():
    today     = date.today()             # 오늘 날짜
    yesterday = today - timedelta(1)     # 어제 날짜

    year = str(yesterday.year)
    mon = str(yesterday.month)
    if int(mon) < 10:
        mon = '0' + str(mon)
    day = str(yesterday.day)
    if int(day) < 10:
        day = '0' + str(day)
        
    return year+mon+day

yesterday = get_yesterday()
#print(' 크롤링 진행 날짜: {}'.format(yesterday))

### 종목 선정

In [3]:
# 종목 vcoab은 종목명 유의어까지 포함
vocab = [
    ['삼성전자', '삼전'],
    ['SK하이닉스', '하닉'],
    ['카카오뱅크', '카뱅'],
    ['두산중공업', '두중'],
    ['HMM', '흠'],
    ['SK바이오사이언스', 'sk바이오', 'sk바사'],
    ['한국전력공사', '한국전력', '한전'],
    ['카카오'],
    ['대한항공', '댄공']
]

### 전체 크롤링

In [4]:
# 다음뉴스 크롤링
start_time = time.time()

for i in range(len(vocab)):
    for j in range(len(vocab[i])):
        # 종목 선택
        keyword = vocab[i][j]
        
        # 크롤링
        globals()[f'daumnews_{keyword}_info'], globals()[f'daumnews_{keyword}_comment'] = get_daumnews_info(keyword)
        
        # 파일 저장
        globals()[f'daumnews_{keyword}_info'].to_csv(f'{yesterday}_daumnews_{keyword}_info.csv')
        globals()[f'daumnews_{keyword}_comment'].to_csv(f'{yesterday}_daumnews_{keyword}_comment.csv')
        
#print(' daumnews crawling time: {}'.format(time.time()-start_time))

In [5]:
# 디시인사이드 크롤링
start_time = time.time()

for i in range(len(vocab)):
    for j in range(len(vocab[i])):
        # 종목 선택
        keyword = vocab[i][j]
        
        # 크롤링
        globals()[f'dcinside_{keyword}_info'], globals()[f'dcinside_{keyword}_comment'] = get_dcinside_info(keyword)
        
        # 파일 저장
        globals()[f'dcinside_{keyword}_info'].to_csv(f'{yesterday}_dcinside_{keyword}_info.csv')
        globals()[f'dcinside_{keyword}_comment'].to_csv(f'{yesterday}_dcinside_{keyword}_comment.csv')
        
#print(' dcinside crawling time: {}'.format(time.time()-start_time))

In [6]:
# 종토방 크롤링
start_time = time.time()

for i in range(len(vocab)):
    for j in range(len(vocab[i])):
        # 종목 선택
        keyword0 = vocab[i][0]
        keyword1 = vocab[i][j]
        keyword = [keyword0, keyword1]
        
        # 크롤링
        globals()[f'jongto_{keyword0}_{keyword1}_info'], globals()[f'jongto_{keyword0}_{keyword1}_comment'] = get_jongto_info(keyword)

        # 파일 저장
        globals()[f'jongto_{keyword0}_{keyword1}_info'].to_csv(f'{yesterday}_jongto_{keyword0}_{keyword1}_info.csv')
        globals()[f'jongto_{keyword0}_{keyword1}_comment'].to_csv(f'{yesterday}_jongto_{keyword0}_{keyword1}_comment.csv')
        
#print(' jongto crawling time: {}'.format(time.time()-start_time))

In [7]:
# 네이버뉴스 크롤링
start_time = time.time()

for i in range(len(vocab)):
    for j in range(len(vocab[i])):
        # 종목 선택
        keyword = vocab[i][j]
        
        # 크롤링
        globals()[f'navernews_{keyword}_info'], globals()[f'navernews_{keyword}_comment'] = get_navernews_info(keyword)
        
        # 파일 저장
        globals()[f'navernews_{keyword}_info'].to_csv(f'{yesterday}_navernews_{keyword}_info.csv')
        globals()[f'navernews_{keyword}_comment'].to_csv(f'{yesterday}_navernews_{keyword}_comment.csv')
        
#print(' navernews crawling time: {}'.format(time.time()-start_time))

In [8]:
# 유튜브 크롤링
start_time = time.time()

for i in range(len(vocab)):
    # 종목 선택
    keyword = vocab[i][0]
    if keyword == '한국전력공사':
        keyword = '한국전력'
        
    # 크롤링
    globals()[f'youtube_{keyword}_info'], globals()[f'youtube_{keyword}_comment'] = get_youtube_info(keyword)
    
    # 파일 저장
    globals()[f'youtube_{keyword}_info'].to_csv(f'{yesterday}_youtube_{keyword}_info.csv')
    globals()[f'youtube_{keyword}_comment'].to_csv(f'{yesterday}_youtube_{keyword}_comment.csv')
        
#print(' youtube crawling time: {}'.format(time.time()-start_time))