In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
from collections import namedtuple

In [25]:
def soupify_url(url):
    r = requests.get(url)
    c = r.content
    
    soup = BeautifulSoup(c, 'html.parser')
    return soup

def get_table_data(table):
    rows = table.find_all('tr')
    
    data = []
    for idx, row in enumerate(rows):
        if idx==0:
            headers = [elem.text.strip() for elem in row.find_all('th')]
        else:
            values = [elem.text.strip() for elem in row.find_all('td')]
            data.append([x for x in values])
    
    return pd.DataFrame(data, columns=headers)

## Transaction History

In [26]:
url = 'https://ottoneu.fangraphs.com/953/transactions'
soup = soupify_url(url)

In [27]:
table_name = soup.find('main').find('h1').get_text()

In [28]:
table = soup.find('main').find('table')

In [33]:
transactions = get_table_data(table)

https://ottoneu.fangraphs.com/953/transactions?page=34 (starts at zero index)

might need to just walk through every page either with selenium or indexing

set it to scrape every sunday. save existing transactions to csv, add new ones, and drop duplicates
   - only scrape first page. if no duplicates, then scrape next page

In [34]:
transactions.head()

Unnamed: 0,Date,Transaction Type,Player Name,Team Name,From Team,Salary
0,"Apr 21, 2019 13:56:17",cut,Scott Schebler,VORP Speed,,$1
1,"Apr 21, 2019 13:55:01",add,Scott Schebler,VORP Speed,,$2
2,"Apr 21, 2019 11:46:58",cut,Nick Pivetta,Bobblehead,,$2
3,"Apr 20, 2019 23:10:32",cut,Jon Lester,St. Clair Gorillas,,$4
4,"Apr 20, 2019 16:49:01",add,Hector Neris,Clueless Joes,,$5


In [65]:
transactions['Team Name'].value_counts()

Bobblehead            10
Enders Game            9
Urbina Blight          9
VORP Speed             5
St. Clair Gorillas     4
Clueless Joes          4
the balking dead       3
Peons                  3
The Hobo Jungle        2
Scotty Smalls          1
Name: Team Name, dtype: int64

## Individual Auctions

In [95]:
# probably need Selenium to comb through the auction results
auction_results_url = 'https://ottoneu.fangraphs.com/953/auctionresults?id=384614'
ar = requests.get(auction_results_url)
ar_c = ar.content

ar_soup = BeautifulSoup(ar_c, 'html.parser')

In [96]:
ar_head = ar_soup.find('main').find('h2').get_text().split()

In [97]:
ar_head.split()

['Auction', 'for', 'Sam', 'Gaviglio', 'TOR', 'SP/RP', 'R']

In [106]:
player_name = [x for x in ar_head[1:-1] if x.istitle()]
player_team = ar_head[-3]
player_pos = ar_head[-2]
player_hand = ar_head[-1]

In [99]:
started_by = ' '.join(ar_soup.find_all('h3')[1].get_text().split()[3:])

In [100]:
started_by

'St. Clair Gorillas'

In [101]:
all_bids = ar_soup.find('table')

In [102]:
bid_rows = all_bids.find_all('tr')

bid_data = []
for idx, row in enumerate(bid_rows):
    if idx == 0:
        cols = row.find_all('th')
    else:
        cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    bid_data.append([ele for ele in cols])

In [103]:
bid_data

[['Team', 'Bid'], ['Clueless Joes', '$5'], ['St. Clair Gorillas', '$1']]

## League Finances

In [3]:
finances_url = 'https://ottoneu.fangraphs.com/953/tools'
fin_soup = soupify_url(finances_url)

In [9]:
fin_table = fin_soup.find_all('section', {'class':'section-container'})[1].find('table')

In [35]:
finances_df = get_table_data(fin_table)

individual player transaction history?

In [36]:
finances_df.head()

Unnamed: 0,Team,Players,Spots,Base Salaries,Vote Off,Cap Penalties,Incoming Loans,Outgoing Loans,Available Cap Space
0,Clueless Joes,41,41,$249,$0,$0,$0,$0,$151
1,St. Clair Gorillas,38,41,$387,$0,$8,$0,$0,$5
2,Launch Angela Merkel,38,40,$370,$0,$0,$0,$0,$30
3,Enders Game,40,40,$385,$0,$9,$0,$0,$6
4,Bobblehead,38,40,$333,$0,$18,$0,$0,$49


In [38]:
finances_df.dtypes

Team                   object
Players                object
Spots                  object
Base Salaries          object
Vote Off               object
Cap Penalties          object
Incoming Loans         object
Outgoing Loans         object
Available Cap Space    object
dtype: object

In [41]:
finances_df['Base Salaries'].str.replace('$', '').astype(int)

0     249
1     387
2     370
3     385
4     333
5     383
6     365
7     369
8     399
9     379
10    379
11    363
Name: Base Salaries, dtype: int64

In [39]:
finances_df.Players.astype(int)

0     41
1     38
2     38
3     40
4     38
5     38
6     41
7     40
8     41
9     39
10    41
11    41
Name: Players, dtype: int64