## 벅스 뮤직 탑 100 크롤링 후 저장

In [1]:
import requests
from bs4 import BeautifulSoup  
import pandas as pd

url = 'https://music.bugs.co.kr/chart'

res = requests.get(url)
          
soup = BeautifulSoup(res.content, 'html.parser')

datas = soup.select('#CHARTrealtime > table > tbody > tr')

bugs_df = pd.DataFrame(columns=['순위','제목','가수'])

for idx,data in enumerate(datas):
    rank = data.select_one('div.ranking > strong').text
    title = data.select_one('p.title > a').text
    artist = data.select_one('p.artist > a').text
    
    bugs_df.loc[idx] = [rank,title,artist]
        

bugs_df = bugs_df.set_index('순위')
bugs_df.head()

# bugs_df.to_csv("./data/벅스탑100크롤링.csv",encoding='cp949')

Unnamed: 0_level_0,제목,가수
순위,Unnamed: 1_level_1,Unnamed: 2_level_1
1,REBEL HEART,IVE (아이브)
2,Whiplash,aespa
3,toxic till the end,로제(ROSÉ)
4,"HOME SWEET HOME (feat. 태양, 대성)",G-DRAGON
5,Drowning,WOODZ


## 벅스 뮤직 날짜별 순위

### URL 살펴보기
* 자동화를 위해서는 URL에 패턴이 필요하다.

In [None]:
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250101
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250110
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250119
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250124

# 결론:
# 기본 URL : https://music.bugs.co.kr/chart/track/day/total?chartdate=
# 가공할 값 : 20250124 (형식:yyyymmdd)

### 날짜 가공하는 법

* 파이썬 코드 버전
* 현재의 날짜로 부터 30일 전까지의 모든 날짜를 출력

In [2]:
from datetime import datetime, timedelta

end_date = datetime.now()
start_date = end_date - timedelta(days=30)
delta = timedelta(days=1)

In [4]:
print(end_date)
print(start_date)
print(delta)

2025-01-24 01:21:30.406318
2024-12-25 01:21:30.406318
1 day, 0:00:00


In [6]:
current_date = start_date
dateStr_lst = []
while current_date <= end_date:
    dstr = current_date.strftime('%Y%m%d')
    # print(dstr)
    dateStr_lst.append(dstr)
    current_date += delta

In [7]:
dateStr_lst[:10]

['20241225',
 '20241226',
 '20241227',
 '20241228',
 '20241229',
 '20241230',
 '20241231',
 '20250101',
 '20250102',
 '20250103']

In [8]:
# 함수로 만들기
# 함수이름:getDateStrList_From_Now
# 매개변수: default 30일

def getDateStrList_From_Now(periods=30):
    from datetime import datetime, timedelta

    end_date = datetime.now()
    start_date = end_date - timedelta(days=periods)
    delta = timedelta(days=1)

    current_date = start_date
    dateStr_lst = []
    while current_date <= end_date:
        dstr = current_date.strftime("%Y%m%d")
        # print(dstr)
        dateStr_lst.append(dstr)
        current_date += delta
    return dateStr_lst

In [15]:
getDateStrList_From_Now()[:3]

['20241225', '20241226', '20241227']

In [17]:
len(getDateStrList_From_Now())

31

### URL 만들기

In [18]:
periods = 3

basic_url = "https://music.bugs.co.kr/chart/track/day/total?chartdate="

range_date = getDateStrList_From_Now(periods)

for rdate in range_date:
    url = basic_url + rdate
    print(url)

https://music.bugs.co.kr/chart/track/day/total?chartdate=20250121
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250122
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250123
https://music.bugs.co.kr/chart/track/day/total?chartdate=20250124


### 오늘날짜 순위 크롤링

In [37]:
import requests
import pandas as pd
from bs4 import BeautifulSoup 

head = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'}
url = 'https://music.bugs.co.kr/chart/track/day/total?chartdate=20250124'

res = requests.get(url, headers=head)
soup = BeautifulSoup(res.content, 'html.parser')

datas = soup.select('#CHARTday > table > tbody > tr')

bugs_df = pd.DataFrame(columns=['날짜','순위','제목','가수'])

for idx,data in enumerate(datas):
    rank = data.select_one('div.ranking > strong').text
    title = data.select_one('p.title > a').text
    artist = data.select_one('p.artist > a').text

    bugs_df.loc[idx] = ['2025-01-24',rank,title,artist]

bugs_df = bugs_df.set_index('날짜')
bugs_df.head()

# bugs_df.to_csv("./data/벅스탑100크롤링_날짜.csv")

Unnamed: 0_level_0,순위,제목,가수
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-01-24,1,REBEL HEART,IVE (아이브)
2025-01-24,2,Whiplash,aespa
2025-01-24,3,toxic till the end,로제(ROSÉ)
2025-01-24,4,"HOME SWEET HOME (feat. 태양, 대성)",G-DRAGON
2025-01-24,5,APT.,로제(ROSÉ)


### 자동 크롤링

In [51]:
# for i in range(3):
#     for j in range(100):
#         print(i*100 + j)

In [52]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
from datetime import datetime

head = head = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'}

periods = 100

basic_url = "https://music.bugs.co.kr/chart/track/day/total?chartdate="

range_date = getDateStrList_From_Now(periods)

bugs_df = pd.DataFrame(columns=['날짜','순위','제목','가수'])

for ncount,rdate in enumerate(range_date):
    url = basic_url + rdate
    print(url)

    res = requests.get(url, headers=head)
    soup = BeautifulSoup(res.content, 'html.parser')
    datas = soup.select('#CHARTday > table > tbody > tr')

    for idx,data in enumerate(datas):
        rank = data.select_one('div.ranking > strong').text
        title = data.select_one('p.title > a').text
        artist = data.select_one('p.artist > a').text

        formDate = datetime.strptime(rdate,'%Y%m%d').strftime('%Y-%m-%d')
        bugs_df.loc[ncount*100 + idx] = [formDate,rank,title,artist]

bugs_df = bugs_df.set_index('날짜')

bugs_df.head()

https://music.bugs.co.kr/chart/track/day/total?chartdate=20241016
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241017
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241018
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241019
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241020
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241021
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241022
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241023
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241024
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241025
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241026
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241027
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241028
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241029
https://music.bugs.co.kr/chart/track/day/total?chartdate=20241030
https://mu

Unnamed: 0_level_0,순위,제목,가수
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-16,1,Mantra,제니 (JENNIE)
2024-10-16,2,UP (KARINA Solo),aespa
2024-10-16,3,"LOVE, MONEY, FAME (feat. DJ Khaled)",세븐틴(SEVENTEEN)
2024-10-16,4,내 이름 맑음,QWER
2024-10-16,5,HAPPY,DAY6 (데이식스)


In [53]:
bugs_df.to_csv(f"./data/벅스탑100크롤링_{range_date[0]}_{range_date[-1]}.csv")

In [54]:
bugs_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10100 entries, 2024-10-16 to 2025-01-24
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   순위      10100 non-null  object
 1   제목      10100 non-null  object
 2   가수      10100 non-null  object
dtypes: object(3)
memory usage: 315.6+ KB


In [55]:
len(range_date)

101