## Scraping statistics for the players in the fantasy league 

In [86]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

class TeamInfo():
    
    def __init__(self, team_name = 'Rookies'):
        self.team = team_name
    
    def players(self):
        all_teams = {
            "Rookies": {
                "D.Fox":"f/foxde01",
                "T.Haliburton":"h/halibty01",
                "S.Barnes": "b/barnesc01",
                "V.Wembanyama":"w/wembavi01",
                "A.Reaves":"r/reaveau01",
                "T.Murphy":"m/murphtr02",
                "D.DeRozan":"d/derozde01",
                "J.Williams":"w/willija06",
                "W.Kessler":"k/kesslwa01",
                "J.Johnson":"j/johnsja05",
                "A.Thompson":"t/thompau01"
            }
        }
        
        return all_teams[self.team] 


class ExtractPlayerHistory():
    
    def __init__(self, player_name, player_code):
        self.url = "https://www.basketball-reference.com/players/"+player_code+"/gamelog/2024"
        self.player = player_name
        self.categories = {
            "date":1,
            "age":2,
            "tm":3,
            "opp":5,
            "winlose":6,
            "gs":7,
            "mp":8,
            "fg":9,
            "fga":10,
            "3p":12,
            "3pa":13,
            "ft":15,
            "fta":16,
            "orb":18,
            "drb":19,
            "trb":20,
            "ast":21,
            "stl":22,
            "blk":23,
            "tov":24,
            "pf":25,
            "pts":26,
            "gmsc":27,
            "plusminus":28
        }
        self.stats = {}
        for cat in self.categories:
            self.stats[cat] = []
    
    def scrape_data(self):
        page = requests.get(self.url)
        soup = BeautifulSoup(page.content, "html.parser") 
        table = soup.find('table', attrs={'class':'row_summable sortable stats_table'})
        table_body = table.find('tbody')
        rows = table_body.find_all('tr')
        
        data = []
        for row in rows:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols]
            data.append([ele for ele in cols])
#             data.append([ele for ele in cols if ele]) # Get rid of empty values
        
        return data
        
    def make_dict(self, data):
        player_stats = self.stats
        for gameday in data:
            if not gameday:
                continue
            for cat in player_stats:
                if len(gameday) == 8 and cat != "date":
                    player_stats[cat].append("Inactive")
                else:
                    player_stats[cat].append(gameday[self.categories[cat]])                   
        return player_stats

In [87]:
rookies = TeamInfo()

In [88]:
playerHistRookies = {}
for name,code in rookies.players().items():
    print(name)
    player = ExtractPlayerHistory(name,code)
    player_data = player.scrape_data()
    playerHistRookies[name] = player.make_dict(player_data)

D.Fox
T.Haliburton
S.Barnes
V.Wembanyama
A.Reaves
T.Murphy
D.DeRozan
J.Williams
W.Kessler
J.Johnson
A.Thompson


In [89]:
playerHistRookies['A.Thompson']

{'date': ['2023-10-25',
  '2023-10-27',
  '2023-10-28',
  '2023-10-30',
  '2023-11-01',
  '2023-11-02',
  '2023-11-05',
  '2023-11-06',
  '2023-11-08',
  '2023-11-10',
  '2023-11-12',
  '2023-11-14',
  '2023-11-17',
  '2023-11-19',
  '2023-11-20',
  '2023-11-24',
  '2023-11-27',
  '2023-11-29',
  '2023-11-30',
  '2023-12-02',
  '2023-12-06',
  '2023-12-08',
  '2023-12-11',
  '2023-12-13'],
 'age': ['20-268',
  '20-270',
  '20-271',
  '20-273',
  '20-275',
  '20-276',
  '20-279',
  '20-280',
  '20-282',
  '20-284',
  '20-286',
  '20-288',
  '20-291',
  '20-293',
  '20-294',
  '20-298',
  '20-301',
  '20-303',
  '20-304',
  '20-306',
  '20-310',
  '20-312',
  '20-315',
  '20-317'],
 'tm': ['DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET',
  'DET'],
 'opp': ['MIA',
  'CHO',
  'CHI',
  'OKC',
  'POR',
  'NOP',
  'PHO',
  'GSW',
  'MIL',


In [90]:
import json

with open('rookiesDec13.json', 'w') as fp:
    json.dump(playerHistRookies, fp)