In [1]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime, timedelta
import re
import pandas as pd

now = datetime.now()
today = f'{now.year}.{now.month}.{now.day}'

In [2]:


class KBOResultBot:
    
    def __init__(self, date, team='롯데'):
        self.date = self.convert_to_yyyy_mm_dd(date)
        self.team = team
        
    def convert_to_yyyy_mm_dd(self, date_str):
        if date_str == '오늘':
            return datetime.now().strftime("%Y-%m-%d")
        
        formats = ["%Y년 %m월 %d일", "%Y/%m/%d", "%Y-%m-%d", "%Y %m %d", "%Y.%m.%d", "%Y.%m.%d",
                   "%y년 %m월 %d일", "%y/%m/%d", "%y-%m-%d", "%y %m %d", "%y.%m.%d", "%y.%m.%d",
                  ]

        for fmt in formats:
            try:
                date_obj = datetime.strptime(date_str, fmt)
                return date_obj.strftime("%Y-%m-%d")
            except ValueError:
                pass

        return "날짜 형식을 인식할 수 없습니다."
        
    def extract_stadium(self):
        if self.date == "날짜 형식을 인식할 수 없습니다.":
            return "날짜 형식을 인식할 수 없습니다."
        
        url = f'http://www.statiz.co.kr/boxscore.php?date={self.date}'
        r = requests.get(url).text
        soup = BeautifulSoup(r, 'lxml')

        boxes = soup.find_all("div", "box")
        for box in boxes:
            if self.team in box.text:
                if '우천취소' in box.text:
                    return '우천취소'

                pattern = r'\((.*?)\)'
                match = re.search(pattern, box.find("h3", "box-title").text)
                if match:
                    stadium = match.group(1).replace(' ', '+')
                    return stadium
                
    def table(self):
        stadium = self.extract_stadium()
        if stadium == '우천취소':
            return '우천취소'
        elif stadium == "날짜 형식을 인식할 수 없습니다.":
            return "날짜 형식을 인식할 수 없습니다."
        url = f"http://www.statiz.co.kr/boxscore.php?date={self.date}&stadium={stadium}"

        r = requests.get(url).text
        soup = BeautifulSoup(r, 'lxml')

        boxes = soup.find_all("div", "box")
        if len(boxes) < 3:
            return "경기가 없습니다."

        columns = [box.text for box in boxes[2].find_all('th')]
        team1 = []
        team2 = []
        
        for td in boxes[2].find_all('tr')[1].find_all('td'):
            for span in td.find_all('span'):
                span.extract()  # <span> 태그 제거
            text = td.get_text(strip=True)  # 텍스트 부분 추출
            team1.append(text)


        for td in boxes[2].find_all('tr')[2].find_all('td'):
            for span in td.find_all('span'):
                span.extract()  # <span> 태그 제거
            text = td.get_text(strip=True)  # 텍스트 부분 추출
            team2.append(text)

        return pd.DataFrame([team1, team2], columns=columns).set_index('팀').T

In [3]:
KBOResultBot('오늘').table()

'경기가 없습니다.'

In [4]:
KBOResultBot('23 6 8').table()

팀,KT,롯데
1,1,0
2,0,0
3,1,0
4,2,0
5,1,0
6,0,0
7,0,5
8,0,0
9,1,1
10,0,0


In [5]:
KBOResultBot('23 4 5').table()

'우천취소'