In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import re

def generate_urls(type: str, offset: int=None):
    assert type in ['manga', 'anime'], 'Please provide `type` of either `manga` or `anime`'
    _url = f'https://myanimelist.net/top{type}.php'

    if offset is not None:
        assert offset % 50 == 0, 'Please provide offset in multiples of 50'
        _url = _url + f'?limit={offset}'

    res = requests.get(_url)
    soup = BeautifulSoup(res.text, 'html.parser')

    if type == 'anime':
        titles = soup.find_all('h3', class_='anime_ranking_h3')
        dates = soup.find_all('div', class_='information di-ib mt4')
        scores = soup.find_all('div', class_='js-top-ranking-score-col di-ib al')
        ranks = soup.find_all('span', class_='rank')
        member_divs = soup.find_all('div', class_='information di-ib mt4')
    else:
        titles = soup.find_all('h3', class_='manga_h3')
        dates = soup.find_all('div', class_='information di-ib mt4')
        scores = soup.find_all('div', class_ ='js-top-ranking-score-col di-ib al' )
        ranks = soup.find_all('span', class_ = 'rank')
        member_divs = soup.find_all('div', class_='information di-ib mt4')

    results = []
    for title, date, score, rank, member_div in zip(titles, dates, scores, ranks, member_divs):
        _title = title.a.contents
        _url = title.a.get('href')
        date_text = date.contents[2].strip()
        # Extract the first and last year from the date range if present
        years = re.findall(r'\b\d{4}\b', date_text)
        if len(years) == 2:
            start_date = datetime.strptime(years[0], '%Y').date()
            end_date = datetime.strptime(years[1], '%Y').date()
            _date = f"{start_date.strftime('%Y-%m')} - {end_date.strftime('%Y-%m')}"
        elif len(years) == 1:
            _date = datetime.strptime(years[0], '%Y').date().strftime('%Y-%m')
        else:
            _date = None

        _score = float(score.text.strip()) if score else None

        _rank = int(rank.text.strip()) if rank else None

        _members = None
        if member_div:
            member_text = member_div.get_text(strip=True)
            member_count_match = re.search(r'(\d{1,3}(?:,\d{3})*)(?=\s+members)', member_text)
            if member_count_match:
                # Extract the digits and remove any commas
                _members = int(member_count_match.group(1).replace(',', ''))

        results.append({
            'title': _title,
            'url': _url,
            'date_published': _date,
            'score': _score,
            'rank': _rank,
            'members': _members
        })

    continuation_token = soup.find_all('a', class_='next')
    if continuation_token:
        if offset is None:
            offset = 50
        else:
            offset = offset + 50
    else:
        offset = None

    return offset, results
offset, results = generate_urls('anime', offset=None)
for result in results:
    print(result)


{'title': ['Sousou no Frieren'], 'url': 'https://myanimelist.net/anime/52991/Sousou_no_Frieren', 'date_published': '2023-01 - 2024-01', 'score': 9.38, 'rank': 1, 'members': 707752}
{'title': ['Fullmetal Alchemist: Brotherhood'], 'url': 'https://myanimelist.net/anime/5114/Fullmetal_Alchemist__Brotherhood', 'date_published': '2009-01 - 2010-01', 'score': 9.09, 'rank': 2, 'members': 103340920}
{'title': ['Steins;Gate'], 'url': 'https://myanimelist.net/anime/9253/Steins_Gate', 'date_published': '2011-01 - 2011-01', 'score': 9.07, 'rank': 3, 'members': 112560576}
{'title': ['Gintama°'], 'url': 'https://myanimelist.net/anime/28977/Gintama°', 'date_published': '2015-01 - 2016-01', 'score': 9.06, 'rank': 4, 'members': 630004}
{'title': ['Shingeki no Kyojin Season 3 Part 2'], 'url': 'https://myanimelist.net/anime/38524/Shingeki_no_Kyojin_Season_3_Part_2', 'date_published': '2019-01 - 2019-01', 'score': 9.05, 'rank': 5, 'members': 192271998}
{'title': ['Gintama: The Final'], 'url': 'https://myan

In [2]:
manga = []
offset = None
type = 'manga'
manga_count = 0

while manga_count < 100:  # Download only 100 manga
    offset, _res = generate_urls(type=type, offset=offset)
    manga += _res
    print(f'{offset} {type} downloaded')
    manga_count += len(_res)

50 manga downloaded
100 manga downloaded
150 manga downloaded
200 manga downloaded
250 manga downloaded
300 manga downloaded
350 manga downloaded


In [3]:
manga[1]['url']

'https://myanimelist.net/manga/1706/JoJo_no_Kimyou_na_Bouken_Part_7__Steel_Ball_Run'

In [4]:
manga[0]['date_published']

'1989-01'

In [5]:
anime = []
offset = None
type = 'anime'
anime_count = 0

while anime_count < 100:
    offset, _res = generate_urls(type=type, offset=offset)
    anime += _res
    print(f'{offset} {type} downloaded')
    anime_count += len(_res)

50 anime downloaded
100 anime downloaded
150 anime downloaded
200 anime downloaded
250 anime downloaded
300 anime downloaded
350 anime downloaded


In [6]:
anime[0]['url']

'https://myanimelist.net/anime/52991/Sousou_no_Frieren'

In [7]:
anime[0]['date_published']

'2023-01 - 2024-01'

In [8]:
manga[0].keys()

dict_keys(['title', 'url', 'date_published', 'score', 'rank', 'members'])

In [9]:
anime[0].keys()

dict_keys(['title', 'url', 'date_published', 'score', 'rank', 'members'])

In [10]:
import pandas as pd
pd.DataFrame(manga)

Unnamed: 0,title,url,date_published,score,rank,members
0,[Berserk],https://myanimelist.net/manga/2/Berserk,1989-01,9.47,1,691452
1,[JoJo no Kimyou na Bouken Part 7: Steel Ball Run],https://myanimelist.net/manga/1706/JoJo_no_Kim...,2004-01 - 2011-01,9.30,2,265990
2,[Vagabond],https://myanimelist.net/manga/656/Vagabond,1998-01 - 2015-01,9.25,3,382889
3,[One Piece],https://myanimelist.net/manga/13/One_Piece,1997-01,9.22,4,618736
4,[Monster],https://myanimelist.net/manga/1/Monster,1994-01 - 2001-01,9.16,5,245907
...,...,...,...,...,...,...
100,[Sunny],https://myanimelist.net/manga/23571/Sunny,2010-01 - 2015-01,8.29,1,520203
101,[Orange],https://myanimelist.net/manga/35573/Orange,2012-01 - 2022-01,8.30,2,157273
102,[Sword Art Online: Progressive],https://myanimelist.net/manga/43921/Sword_Art_...,2012-01,8.30,3,22204
103,[Sakamichi no Apollon: Bonus Track],https://myanimelist.net/manga/47022/Sakamichi_...,2012-01 - 2012-01,8.30,4,126401


In [11]:
pd.DataFrame(anime)

Unnamed: 0,title,url,date_published,score,rank,members
0,[Sousou no Frieren],https://myanimelist.net/anime/52991/Sousou_no_...,2023-01 - 2024-01,9.38,1,707752
1,[Fullmetal Alchemist: Brotherhood],https://myanimelist.net/anime/5114/Fullmetal_A...,2009-01 - 2010-01,9.09,2,103340920
2,[Steins;Gate],https://myanimelist.net/anime/9253/Steins_Gate,2011-01 - 2011-01,9.07,3,112560576
3,[Gintama°],https://myanimelist.net/anime/28977/Gintama°,2015-01 - 2016-01,9.06,4,630004
4,[Shingeki no Kyojin Season 3 Part 2],https://myanimelist.net/anime/38524/Shingeki_n...,2019-01 - 2019-01,9.05,5,192271998
...,...,...,...,...,...,...
100,[Kobayashi-san Chi no Maid Dragon S],https://myanimelist.net/anime/39247/Kobayashi-...,2021-01 - 2021-01,8.26,1,554153
101,[Kono Subarashii Sekai ni Shukufuku wo! 2],https://myanimelist.net/anime/32937/Kono_Subar...,2017-01 - 2017-01,8.26,2,171511729
102,[Nodame Cantabile],https://myanimelist.net/anime/1698/Nodame_Cant...,2007-01 - 2007-01,8.26,3,315048
103,[Planetes],https://myanimelist.net/anime/329/Planetes,2003-01 - 2004-01,8.26,4,253975


In [12]:
anime_df = pd.DataFrame(anime)
manga_df = pd.DataFrame(manga)

In [13]:
anime_df.to_csv('anime_df.csv')
manga_df.to_csv('manga_df.csv')