In [1]:
import bs4 , requests, re
import numpy as np
import pandas as pd
import datetime



In [2]:
today = str(datetime.datetime.now())[0:10]

In [5]:
class NBAScrapper():
    def __init__(self, url, scraped_date):
        self.scraped_date = scraped_date
        self.url = url
        
        self.soup = self.getHTML()
        self.player_table = self.getPlayerData()
        
    def getHTML(self):
        res = requests.get(self.url)
        soup = bs4.BeautifulSoup(res.text)
        return soup
    
    def getPlayerData(self):
        table = self.soup.find_all({'tr' : 'table table-bordered table-striped table-hover player-table tablesorter'})
        player_classes = [g.get_attribute_list('class') for g in table][1:]
        
        player_vals = []
        for player in player_classes:
            player_vals.append(self.soup.find_all('tr', {'class' : player[0]}))
            
        cleaned_player_data = []
        for player in player_vals:
            vals = [val.text for val in player]
            for v in vals:
                v = re.sub(pattern=r'\n', string = v, repl = ' ')
                v = v.split(' ')
                cleaned_player_data.append(v)
                
        player_dfs = []
        for v in cleaned_player_data:
            r = [int(s) for s in v[0] if s.isdigit()]
            data = {
                'rank' : ''.join(str(p_r) for p_r in r),
                'player_first_name' : ''.join([i for i in v[0] if not i.isdigit()]),
                'player_last_name' : v[1],
                'player_name' : ''.join([i for i in v[0] if not i.isdigit()]) + v[1],
                'team-pos' : ''.join(v[2:5]),
                'fg%' : v[6],
                'ft%' : v[7],
                '3pm' : v[8],
                'reb' : v[9],
                'ast' : v[10],
                'stl' : v[11],
                'blk' : v[12],
                'pts' : v[13],
                'overall' : v[14],
                'scraped_date' : self.scraped_date
            }
            player_df = pd.DataFrame([data])
            player_dfs.append(player_df)
        return pd.concat(player_dfs)

In [6]:
%time nba = NBAScrapper(url = 'https://www.fantasypros.com/nba/player-rater.php?range=ros', scraped_date=today)

CPU times: user 16.2 s, sys: 19.9 ms, total: 16.3 s
Wall time: 16.4 s


In [7]:
nba.player_table

Unnamed: 0,rank,player_first_name,player_last_name,player_name,team-pos,fg%,ft%,3pm,reb,ast,stl,blk,pts,overall,scraped_date
0,1,James,Harden,JamesHarden,"(HOU-PG,SG)",-0.66,4.32,3.77,1.02,3.01,2.50,0.69,3.64,18.28,2020-12-31
0,2,Anthony,Davis,AnthonyDavis,"(LAL-PF,C)",1.92,2.28,0.09,2.48,0.65,1.84,4.11,2.59,15.97,2020-12-31
0,3,Karl-Anthony,Towns,Karl-AnthonyTowns,(MIN-C),1.58,1.17,2.17,3.11,0.95,0.64,2.59,2.47,14.67,2020-12-31
0,4,Stephen,Curry,StephenCurry,"(GSW-PG,SG)",-0.01,3.35,4.10,0.27,2.27,1.74,-0.24,2.83,14.32,2020-12-31
0,5,Damian,Lillard,DamianLillard,(POR-PG),-0.51,3.65,3.15,0.20,2.89,0.91,-0.36,2.91,12.83,2020-12-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,453,Malachi,Flynn,MalachiFlynn,(TOR-PG),-0.95,0.00,-1.22,-1.61,-1.16,-1.67,-1.06,-1.54,-9.21,2020-12-31
0,454,Josh,Hall,JoshHall,(OKC-F),-0.95,0.00,-1.22,-1.61,-1.16,-1.67,-1.06,-1.54,-9.21,2020-12-31
0,455,Jared,Harper,JaredHarper,(NYK-PG),-0.95,0.00,-1.22,-1.61,-1.16,-1.67,-1.06,-1.54,-9.21,2020-12-31
0,456,Marques,Bolden,MarquesBolden,(CLE-C),0.55,-2.77,-1.22,-1.61,-1.16,-1.67,-1.06,-1.38,-10.32,2020-12-31
