In [1]:
import pandas as pd
import requests
import bs4
from decimal import Decimal

In [2]:
response = requests.get('https://steamspy.com/')
response.encoding ='utf-8'

In [3]:
response.status_code

200

In [4]:
response.headers

{'Date': 'Tue, 07 Nov 2023 22:58:02 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'vary': 'Accept-Encoding', 'set-cookie': 'PHPSESSID=ga8ra3s98s8i6oe7e3okcosf0o; path=/', 'expires': 'Thu, 19 Nov 1981 08:52:00 GMT', 'Cache-Control': 'no-store, no-cache, must-revalidate', 'pragma': 'no-cache', 'x-frame-options': 'SAMEORIGIN', 'CF-Cache-Status': 'DYNAMIC', 'Report-To': '{"endpoints":[{"url":"https:\\/\\/a.nel.cloudflare.com\\/report\\/v3?s=Eh38ZV3fqS03lYCSlRcMxnovgAlZkBkbCwXm6m2gBtgnqF0%2FGnj9W05l7wsQx%2FT%2F2QBney7%2FuhVkBxE3rNPz8vYv%2Bdwf3AJCOFB3DoFHlGHgHHGW0iK7JT5%2FoCOcr8xge8c5PCGfNVAB%2B5I%3D"}],"group":"cf-nel","max_age":604800}', 'NEL': '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}', 'Server': 'cloudflare', 'CF-RAY': '82292db828dd6509-GIG', 'Content-Encoding': 'br', 'alt-svc': 'h3=":443"; ma=86400'}

In [5]:
# Parsing document response using BeautifulSoup class
soup = bs4.BeautifulSoup(response.text, 'html.parser')

print(soup.title.text)

SteamSpy - All the data and stats about Steam games


In [6]:
# Checking the html document to know where to get the data from
soup

<!DOCTYPE html>

<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="content-type"/>
<meta charset="utf-8"/>
<title>SteamSpy - All the data and stats about Steam games</title>
<meta content="Games sales" property="og:title"/>
<meta content="SteamSpy - All the data about Steam games" property="og:site_name"/>
<meta content="Steam Spy automatically gathers data from Steam user profiles, analyzes it and presents in simple, yet beautiful, manner. Steam Spy is designed to be helpful for indie developers, journalists, students and all parties interested in PC gaming and its current state of affairs." property="og:description"/>
<meta content="en_US" property="og:locale"/>
<meta content="https://www.facebook.com/galyonkin" property="article:author"/>
<meta content="https://steamspy.com/assets/img/mascot.png" property="og:image"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport"/>
<link href="/pages/ico/60.png" rel="apple-

In [8]:
# Parent container from which we'll get the data
result_set = soup.select('tbody')

The hompage shows 3 tables of games with almost the same columns (except for the first table)

The first group contains the games trending on Steam. The second group shows games recently released on Steam. And the third group is under the title "Top 100 games in the last 2 weeks by total time spent", although the table has 384 rows (games).  

I'll start by making a table for each group.

In [20]:
# Container for each game on the first list
result_set[1].select('tr')

[<tr>
 <td>1</td>
 <td data-order="Tintin Reporter - Cigars of the Pharaoh"><a href="/app/2125090"><img class="img-ss-list" src="https://steamcdn-a.akamaihd.net/steam/apps/2125090/capsule_184x69.jpg"/> Tintin Reporter - Cigars of the Pharaoh</a></td>
 <td class="treleasedate" data-order="2023-11-07">Nov 7, 2023</td>
 <td class="tprice" data-order="3599">$35.99</td><td class="tuserscore" data-order="0">N/A (N/A)</td><td data-order="0">0 .. 20,000</td><td class="tplaytime" data-order="0">00:00 (00:00)</td></tr>,
 <tr>
 <td>2</td>
 <td data-order="Hans"><a href="/app/2616420"><img class="img-ss-list" src="https://steamcdn-a.akamaihd.net/steam/apps/2616420/capsule_184x69.jpg"/> Hans</a></td>
 <td class="treleasedate" data-order="2023-11-06">Nov 6, 2023</td>
 <td class="tprice" data-order="449">$4.49</td><td class="tuserscore" data-order="0">N/A (N/A)</td><td data-order="0">0 .. 20,000</td><td class="tplaytime" data-order="0">00:00 (00:00)</td></tr>,
 <tr>
 <td>3</td>
 <td data-order="Cybor

The contents for the first set are like this:  
1. Index
2. Title, image and link to a detailed view of the game (including game id).
3. Release date
4. Price
5. Score rank
6. Owners

In [315]:
set_1 = result_set[0].select('tr')
set_2 = result_set[1].select('tr')
set_3 = result_set[2].select('tr')

In [263]:
# Structure of each container
set_1[0].select('td')

[<td>1</td>,
 <td data-order="Evil West"><a href="/app/1065310"><img class="img-ss-list" src="https://steamcdn-a.akamaihd.net/steam/apps/1065310/capsule_184x69.jpg"/> Evil West</a></td>,
 <td class="" data-order="2022-11-21">Nov 21, 2022</td>,
 <td data-order="4999">$49.99</td>,
 <td class="t768" data-order="0">N/A (N/A)</td>,
 <td class="t1024" data-order="0">0 .. 20,000</td>]

In [305]:
# Creating empty lists that will become dataframe columns
index = []
title = []
game_id = []
game_url = []
img_url = []
release_date = []
price = []
score_rank = []
owners = []


for i in range(len(set_1)):
    data = set_1[i].select('td')
    
    index.append(int(data[0].text))
    title.append(data[1]['data-order'])
    game_id.append(data[1].select('a')[0]['href'].split('/')[2])
    game_url.append('https://steamspy.com' + data[1].select('a')[0]['href'])
    img_url.append(data[1].select('img')[0]['src'])
    release_date.append(data[2]['data-order'])
    price.append(data[3].text.replace('$', ''))
    score_rank.append(data[4].text)
    owners.append(data[5].text)

In [311]:
# Creating dataframe for set 1
dataframe_1 = pd.DataFrame(zip(index, title, game_id, game_url, img_url, release_date, price, score_rank, owners),
                          columns=['index', 'title', 'game_id', 'game_url', 'img_url', 'release_date', 'price', 'score_rank', 'owners'])

In [312]:
dataframe_1

Unnamed: 0,index,title,game_id,game_url,img_url,release_date,price,score_rank,owners
0,1,Evil West,1065310,https://steamspy.com/app/1065310,https://steamcdn-a.akamaihd.net/steam/apps/106...,2022-11-21,49.99,N/A (N/A),"0 .. 20,000"
1,2,Marvel’s Spider-Man: Miles Morales,1817190,https://steamspy.com/app/1817190,https://steamcdn-a.akamaihd.net/steam/apps/181...,2022-11-18,49.99,N/A (N/A),"50,000 .. 100,000"
2,3,The Dark Pictures Anthology: The Devil in Me,1567020,https://steamspy.com/app/1567020,https://steamcdn-a.akamaihd.net/steam/apps/156...,2022-11-17,39.99,N/A (N/A),"50,000 .. 100,000"
3,4,Frozen Flame,715400,https://steamspy.com/app/715400,https://steamcdn-a.akamaihd.net/steam/apps/715...,2022-11-17,25.49,N/A (N/A),"200,000 .. 500,000"
4,5,ZERO Sievert,1782120,https://steamspy.com/app/1782120,https://steamcdn-a.akamaihd.net/steam/apps/178...,2022-11-15,17.99,N/A (N/A),"50,000 .. 100,000"
...,...,...,...,...,...,...,...,...,...
95,96,Titan Station,1881120,https://steamspy.com/app/1881120,https://steamcdn-a.akamaihd.net/steam/apps/188...,2022-11-11,14.99,N/A (N/A),"0 .. 20,000"
96,97,Timeloop: Sink Again Beach,2094750,https://steamspy.com/app/2094750,https://steamcdn-a.akamaihd.net/steam/apps/209...,2022-11-16,7,N/A (N/A),"20,000 .. 50,000"
97,98,Meowjiro,2157340,https://steamspy.com/app/2157340,https://steamcdn-a.akamaihd.net/steam/apps/215...,2022-11-08,4.99,N/A (N/A),"0 .. 20,000"
98,99,The Impregnation of the Elves: Conquest of the...,2108290,https://steamspy.com/app/2108290,https://steamcdn-a.akamaihd.net/steam/apps/210...,2022-11-15,10.99,N/A (N/A),"0 .. 20,000"


We're good to go with the first dataset. Now let's get the second set.

In [317]:
# Structure of each container
set_2[0].select('td')

[<td>1</td>,
 <td data-order="Grapple Whip"><a href="/app/2211120"><img class="img-ss-list" src="https://steamcdn-a.akamaihd.net/steam/apps/2211120/capsule_184x69.jpg"/> Grapple Whip</a></td>,
 <td class="treleasedate" data-order="2022-11-22">Nov 22, 2022</td>,
 <td class="tprice" data-order="119">$1.19</td>,
 <td class="tuserscore" data-order="0">N/A (N/A)</td>,
 <td data-order="0">0 .. 20,000</td>,
 <td class="tplaytime" data-order="0">00:00 (00:00)</td>]

These are the contents of every container of the second set:

1. Index
2. Title, image url and link to a detailed view of the game (including game id).
3. Release date
4. Price
5. Score rank
6. Owners
7. Play time (average and median in parenthesis)

That is to say, the same content as the first set, except for the field "play time", that at first sight has no values of interest (only zeros).

In [333]:
# # Exploration
# set_2[0].select('td')[6].text.split(" ")[1].translate({ord('('): None, ord(')'): None})

'00:00'

In [352]:
# Creating empty lists that will become dataframe columns
index = []
title = []
game_id = []
game_url = []
img_url = []
release_date = []
price = []
score_rank = []
owners = []
play_time_avg = []
play_time_median = []


for i in range(len(set_2)):
    data = set_2[i].select('td')
    
    index.append(int(data[0].text))
    title.append(data[1]['data-order'])
    game_id.append(data[1].select('a')[0]['href'].split('/')[2])
    game_url.append('https://steamspy.com' + data[1].select('a')[0]['href'])
    img_url.append(data[1].select('img')[0]['src'])
    release_date.append(data[2]['data-order'])
    price.append(data[3].text.replace('$', ''))
    score_rank.append(data[4].text)
    owners.append(data[5].text)
    play_time_avg.append(data[6].text.split(" ")[0])
    play_time_median.append(data[6].text.split(" ")[1].translate({ord('('): None, ord(')'): None}))

In [353]:
# Creating dataframe for set 2
dataframe_2 = pd.DataFrame(zip(index, title, game_id, game_url, img_url, release_date, price,
                               score_rank, owners, play_time_avg, play_time_median),
                           columns=['index', 'title', 'game_id', 'game_url', 'img_url', 'release_date', 'price',
                                    'score_rank', 'owners', 'play_time_avg', 'play_time_median'])

Now let's get the final set.

In [339]:
# Structure of each container
set_3[0].select('td')

[<td>1</td>,
 <td data-order="Counter-Strike: Global Offensive"><a href="/app/730"><img class="img-ss-list" src="https://steamcdn-a.akamaihd.net/steam/apps/730/capsule_184x69.jpg"/> Counter-Strike: Global Offensive</a></td>,
 <td class="treleasedate" data-order="2012-08-21">Aug 21, 2012</td>,
 <td class="tprice" data-order="0">Free</td>,
 <td class="tuserscore" data-order="0">N/A (N/A/83%)</td>,
 <td data-order="50,000,000">50,000,000 .. 100,000,000</td>,
 <td data-order="12.59">12.59%</td>,
 <td class="tplaytime" data-order="812">13:32 (04:41)</td>]

The contents are almost the same as the second set, except that now we do have playtime data and also a field with the percentage of owners that actually launched the game in the last two weeks.

In [354]:
# Creating empty lists that will become dataframe columns
index = []
title = []
game_id = []
game_url = []
img_url = []
release_date = []
price = []
score_rank = []
owners = []
players_launch = []
play_time_avg = []
play_time_median = []


for i in range(len(set_3)):
    data = set_3[i].select('td')
    
    index.append(int(data[0].text))
    title.append(data[1]['data-order'])
    game_id.append(data[1].select('a')[0]['href'].split('/')[2])
    game_url.append('https://steamspy.com' + data[1].select('a')[0]['href'])
    img_url.append(data[1].select('img')[0]['src'])
    release_date.append(data[2]['data-order'])
    price.append(data[3].text.replace('$', ''))
    score_rank.append(data[4].text)
    owners.append(data[5].text)
    players_launch.append(data[6].text)
    play_time_avg.append(data[7].text.split(" ")[0])
    play_time_median.append(data[7].text.split(" ")[1].translate({ord('('): None, ord(')'): None}))

In [355]:
# Creating dataframe for set 3
dataframe_3 = pd.DataFrame(zip(index, title, game_id, game_url, img_url, release_date, price,
                               score_rank, owners, players_launch, play_time_avg, play_time_median),
                           columns=['index', 'title', 'game_id', 'game_url', 'img_url', 'release_date', 'price',
                                    'score_rank', 'owners', 'players_launch', 'play_time_avg', 'play_time_median'])

In [356]:
dataframe_3

Unnamed: 0,index,title,game_id,game_url,img_url,release_date,price,score_rank,owners,players_launch,play_time_avg,play_time_median
0,1,Counter-Strike: Global Offensive,730,https://steamspy.com/app/730,https://steamcdn-a.akamaihd.net/steam/apps/730...,2012-08-21,Free,N/A (N/A/83%),"50,000,000 .. 100,000,000",12.59%,13:32,04:41
1,2,Dota 2,570,https://steamspy.com/app/570,https://steamcdn-a.akamaihd.net/steam/apps/570...,2013-07-09,Free,N/A (N/A/90%),"200,000,000 .. 500,000,000",11.38%,28:53,18:16
2,3,Call of Duty: Modern Warfare II,1938090,https://steamspy.com/app/1938090,https://steamcdn-a.akamaihd.net/steam/apps/193...,2022-10-27,69.99,N/A (N/A),"2,000,000 .. 5,000,000",73.98%,12:23,04:28
3,4,Apex Legends,1172470,https://steamspy.com/app/1172470,https://steamcdn-a.akamaihd.net/steam/apps/117...,2020-11-04,Free,N/A (N/A/88%),"20,000,000 .. 50,000,000",14.29%,13:15,06:30
4,5,PUBG: BATTLEGROUNDS,578080,https://steamspy.com/app/578080,https://steamcdn-a.akamaihd.net/steam/apps/578...,2017-12-21,Free,N/A (N/A),"50,000,000 .. 100,000,000",6.5%,11:17,03:18
...,...,...,...,...,...,...,...,...,...,...,...,...
379,380,The Dark Pictures Anthology: The Devil in Me,1567020,https://steamspy.com/app/1567020,https://steamcdn-a.akamaihd.net/steam/apps/156...,2022-11-17,39.99,N/A (N/A),"50,000 .. 100,000",100%,06:21,06:26
380,381,Gold Rush: The Game,451340,https://steamspy.com/app/451340,https://steamcdn-a.akamaihd.net/steam/apps/451...,2017-10-13,9.99,N/A (N/A),"500,000 .. 1,000,000",4.17%,08:44,05:49
381,382,Train Sim World 2,1282590,https://steamspy.com/app/1282590,https://steamcdn-a.akamaihd.net/steam/apps/128...,2020-08-20,,N/A (N/A),"500,000 .. 1,000,000",4%,06:57,02:19
382,383,觅长生,1189490,https://steamspy.com/app/1189490,https://steamcdn-a.akamaihd.net/steam/apps/118...,2019-11-26,10.79,N/A (N/A),"500,000 .. 1,000,000",8.57%,01:09,01:08


### Individual game page scraping

Ok, now that we've got our datasets, let's make some more requests enrich the data.  
We'll make a request for each game's page using the game_url field.

In [602]:
test_url = dataframe_3.game_url[0]

In [603]:
response = requests.get(test_url)
response.encoding ='utf-8'

In [604]:
response.status_code

200

In [605]:
# Parsing document response using BeautifulSoup class
soup = bs4.BeautifulSoup(response.text, 'html.parser')

print(soup.title.text)

Counter-Strike: Global Offensive - SteamSpy - All the data and stats about Steam games


In [606]:
soup

<!DOCTYPE html>

<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="content-type"/>
<meta charset="utf-8"/>
<title>Counter-Strike: Global Offensive - SteamSpy - All the data and stats about Steam games</title>
<!-- BEGIN Facebook meta -->
<meta content="Counter-Strike: Global Offensive - " property="og:title"/>
<meta content="SteamSpy - All the data about Steam games" property="og:site_name"/>
<meta content="Steam Spy automatically gathers data from Steam user profiles, analyzes it and presents in simple, yet beautiful, manner. Steam Spy is designed to be helpful for indie developers, journalists, students and all parties interested in PC gaming and its current state of affairs." property="og:description"/>
<meta content="en_US" property="og:locale"/>
<meta content="https://www.facebook.com/galyonkin" property="article:author"/>
<meta content="https://steamspy.com/assets/img/mascot.png" property="og:image"/>
<meta content="width=device-width, initial-scale=1.0, maximum-s

In [563]:
# Data from text page
soup.select('.p-r-30')[0].select('strong')
# soup.select('.p-r-30')[0].find_all('strong', text='Genre:')[0]

[<strong>Developer:</strong>,
 <strong>Publisher:</strong>,
 <strong>Genre:</strong>,
 <strong>Languages:</strong>,
 <strong>Tags:</strong>,
 <strong>Category:</strong>,
 <strong>Release date</strong>,
 <strong>Free</strong>,
 <strong>Old userscore:</strong>,
 <strong>Metascore:</strong>,
 <strong>Owners</strong>,
 <strong>Followers</strong>,
 <strong>Peak concurrent players yesterday</strong>,
 <strong>YouTube stats</strong>,
 <strong>Playtime in the last 2 weeks:</strong>,
 <strong>Playtime total:</strong>]

In [597]:
# Data from text page
# soup.select('.p-r-30')[0].select('Strong')
# soup.select('.p-r-30')[0].select('trong')[0].next_sibling.next_sibling
a = soup.select('.p-r-30')[0].find_all('strong', text='Playtime total:')[0].next_sibling
b = a#.next_sibling#.text#next_sibling.text#.next_sibling.next_sibling.text
print(b.text)
print(type(b))
print(b.text=="")
print(isinstance(b,bs4.element.Tag))

 485:20 (average) 96:37 (median) 
<class 'bs4.element.NavigableString'>
False
False


In [599]:
def get_developers(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Developer:')[0].next_sibling
    developer_list = []
    
    while a.text != "Publisher:":
        if isinstance(a, bs4.element.Tag) and a.text != "":
            developer_list.append(a.text)
        
        a = a.next_sibling
    
    return developer_list


def get_publishers(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Publisher:')[0].next_sibling
    publisher_list = []
    
    while a.text != "Genre:":
        if isinstance(a, bs4.element.Tag) and a.text != "":
            publisher_list.append(a.text)
        
        a = a.next_sibling
    
    return publisher_list


def get_genre(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Genre:')[0].next_sibling
    genre_list = []
    
    while a.text != "Languages:":
        if isinstance(a, bs4.element.Tag) and a.text != "":
            genre_list.append(a.text)
        
        a = a.next_sibling
    
    return genre_list


def get_languages(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Languages:')[0].next_sibling
    languages_list = []
    
    while a.text != "Tags:":
        if isinstance(a, bs4.element.Tag) and a.text != "":
            languages_list.append(a.text)
        
        a = a.next_sibling
    
    return languages_list


def get_tags(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Tags:')[0].next_sibling
    tags_list = []
    
    while a.text != "Category:":
        if isinstance(a, bs4.element.Tag) and a.text != "":
            tags_list.append(a.text)
        
        a = a.next_sibling
    
    return tags_list


def get_categories(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Category:')[0].next_sibling
    categories_list = []
    
    while a.text != "":
        if isinstance(a, bs4.element.NavigableString) and a.text != "":
            cat_list = a.text.split(", ")
            cat_list = [i.strip() for i in cat_list]
            
        a = a.next_sibling
    
    return cat_list


def get_followers(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Followers')[0].next_sibling
        
    while isinstance(a, bs4.element.NavigableString):
        if a.text != "":
            followers = a.text
            
        a = a.next_sibling
    
    return followers


def get_peak_players(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Peak concurrent players yesterday')[0].next_sibling
        
    while isinstance(a, bs4.element.NavigableString):
        if a.text != "":
            peak_players = a.text
            
        a = a.next_sibling
    
    return peak_players


def get_youtube_stats(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='YouTube stats')[0].next_sibling
        
    while isinstance(a, bs4.element.NavigableString):
        if a.text != "":
            youtube_stats = a.text
            
        a = a.next_sibling
    
    return youtube_stats


def get_total_playtime(bsoup):
    
    a = bsoup.select('.p-r-30')[0].find_all('strong', text='Playtime total:')[0].next_sibling
        
    while isinstance(a, bs4.element.NavigableString):
        if a.text != "":
            total_playtime = a.text
            
        a = a.next_sibling
    
    return total_playtime

In [600]:
#from unidecode import unidecode

In [601]:
get_total_playtime(soup)

' 485:20 (average) 96:37 (median) '

#### Time series

We can find some time series also in the html document. Curiously enough, at the time of developing this notebook, the front page doesn't show the graphs on Twitch data (which is available through the html scrapping). I remember the graphs were being shown yesterday when I was analyzing the webpage.

In [631]:
# All charts data
soup.select('.panel-body')[0].find_all("script")

[<script>
 var data2ccu=[   
 {
 "key": "PCCU",
 "bar": true,
 "values": [
 [1667347200000, 732350, ""],
 [1667433600000, 763997, ""],
 [1667520000000, 975497, ""],
 [1667606400000, 1076999, ""],
 [1667692800000, 874053, "#2B6A94"],
 [1667779200000, 853943, ""],
 [1667865600000, 949775, ""],
 [1667952000000, 890589, ""],
 [1668038400000, 755381, ""],
 [1668124800000, 811294, ""],
 [1668211200000, 1105409, ""],
 [1668297600000, 999216, "#2B6A94"],
 [1668384000000, 760561, ""],
 [1668470400000, 734884, ""],
 [1668556800000, 851213, ""],
 [1668643200000, 958445, ""],
 [1668729600000, 1012231, ""],
 [1668816000000, 1132952, ""],
 [1668902400000, 996069, "#2B6A94"],
 [1668988800000, 867454, ""],
 [1669075200000, 864771, ""],
 [1669161600000, 740666, ""]]}];
 </script>,
 <script>
 var data2hccu=[   
 {
 "key": "HCCU",
 "values": [
 [1665230402000, 817545],
 [1665248402000, 736953],
 [1665266402000, 549822],
 [1665284401000, 418928],
 [1665298801000, 555019],
 [1665316802000, 836347],
 [16653

In [670]:
a = soup.select('.panel-body')[0].find_all("script")[2]

b = a#.next_sibling#.text#next_sibling.text#.next_sibling.next_sibling.text
print(b.text)#.split("[", 1)[1].rsplit("]", 1)[0])
print(type(b))
print(b.text=="")
print(isinstance(b,bs4.element.Tag))


var datadtwitch=[
{
"key": "Viewers",
"bar": true,
"values": [
[1667347200000,, "#ffffff" ],
[1667433600000,, "#ffffff" ],
[1667520000000,, "#ffffff" ],
[1667606400000,, "#ffffff" ],
[1667692800000,, "#ffffff" ],
[1667779200000,, "#ffffff" ],
[1667865600000,, "#ffffff" ],
[1667952000000,, "#ffffff" ],
[1668038400000,, "#ffffff" ],
[1668124800000,, "#ffffff" ],
[1668211200000,, "#ffffff" ],
[1668297600000,, "#ffffff" ],
[1668384000000,, "#ffffff" ],
[1668470400000,, "#ffffff" ],
[1668556800000,, "#ffffff" ],
[1668643200000,, "#ffffff" ],
[1668729600000,, "#ffffff" ],
[1668816000000,, "#ffffff" ],
[1668902400000,, "#ffffff" ],
[1668988800000,, "#ffffff" ],
[1669075200000,, "#ffffff" ],
[1669161600000,, "#ffffff" ]]},{
"key" : "Channels",
"values" : [
[1667347200000,, "#ffffff" ],
[1667433600000,, "#ffffff" ],
[1667520000000,, "#ffffff" ],
[1667606400000,, "#ffffff" ],
[1667692800000,, "#ffffff" ],
[1667779200000,, "#ffffff" ],
[1667865600000,, "#ffffff" ],
[1667952000000,, "#ff

In [640]:
import json

In [None]:
json.loads 

In [671]:
# Getting a json with data of all 4 charts (PCCU, HCCU, Twitch Daily stats, Twitch Hourly stats)

chart_list = ["pccu", "hccu"]
json_list = []

for i in zip(range(2), chart_list):
    a = soup.select('.panel-body')[0].find_all("script")[i[0]]
    json_list.append(a.text.split("[", 1)[1].rsplit("]", 1)[0].
                     replace("\n","").
                     replace("\r",""))

{'key': 'HCCU',
 'values': [[1665230402000, 817545],
  [1665248402000, 736953],
  [1665266402000, 549822],
  [1665284401000, 418928],
  [1665298801000, 555019],
  [1665316802000, 836347],
  [1665331201000, 788650],
  [1665345601000, 630709],
  [1665360001000, 358898],
  [1665374401000, 389535],
  [1665388802000, 492725],
  [1665403202000, 707259],
  [1665417602000, 737285],
  [1665432002000, 635728],
  [1665446402000, 362910],
  [1665460802000, 377052],
  [1665475201000, 482525],
  [1665489602000, 703566],
  [1665504002000, 719244],
  [1665518401000, 601328],
  [1665532801000, 319436],
  [1665547201000, 386614],
  [1665561601000, 480446],
  [1665579601000, 820875],
  [1665597601000, 692045],
  [1665615600000, 357892],
  [1665630001000, 359044],
  [1665644401000, 425631],
  [1665662401000, 697343],
  [1665680402000, 672231],
  [1665698402000, 442415],
  [1665712801000, 319263],
  [1665727201000, 433269],
  [1665741602000, 582780],
  [1665759602000, 955637],
  [1665774001000, 717619],
  