In [1]:
#Install/import necessary packages
!pip install mtgsdk
from mtgsdk import Card
from bs4 import BeautifulSoup

import requests
import pandas as pd
import json
import pickle
from pandas.io.json import json_normalize



# CARD DATA SCRAPING 

If you don't have the data/modernCardsUnprocessed.pickle file, run these cells:

In [8]:
#Get all modern cards
cards = Card.where(gameFormat='modern').all()
cardDicts = [c.__dict__ for c in cards]
cardsDf = pd.DataFrame(cardDicts)

In [9]:
with open('data/modernCardsUnprocessed.pickle','wb') as pickle_out:
    pickle.dump(cardsDf,pickle_out)

Otherwise, run these cell:

In [13]:
#Open unprocessed data
with open('data/modernCardsUnprocessed.pickle','rb') as pickle_in:
    cardsDf = pickle.load(pickle_in)

In [14]:
#Filter out the lands
types = cardsDf['types']

nonlandFilter = ["Land" not in t for t in types]

nonLands = pd.DataFrame(cardsDf.loc[nonlandFilter])

In [15]:
#This will fix list columns with null values and join lists by commas
def fixListCol(el):
    if el == None:
        return ''
    elif type(el) == list:
        return ','.join(el)
    return el

In [16]:
# Apply the fix to relevant columns
nonLands['supertypes'] = nonLands['supertypes'].apply(fixListCol)
nonLands['types'] = nonLands['types'].apply(fixListCol)
nonLands['subtypes'] = nonLands['subtypes'].apply(fixListCol)
nonLands['color_identity'] = nonLands['color_identity'].apply(fixListCol)


In [17]:
#Select the relevant columns and save the data
cardsDfFinal = nonLands[['artist','border','cmc','color_identity','layout','loyalty','mana_cost','name','power','rarity','release_date','source','subtypes','supertypes','text','timeshifted','toughness','types','set']]

print(cardsDfFinal.head())

with open('data/modernCardsFinal.pickle','wb') as pickle_out:
    pickle.dump(cardsDfFinal,pickle_out)

            artist border  cmc color_identity  layout  loyalty  mana_cost  \
0      Warren Mahy   None    4              R  normal      NaN     {3}{R}   
16  Douglas Shuler   None    5              W  normal      NaN  {3}{W}{W}   
17  Trevor Claxton   None    4              G  normal      NaN  {2}{G}{G}   
18            Izzy   None    3            W,U  normal      NaN  {1}{W}{U}   
19    Steve Argyle   None    2              W  normal      NaN     {1}{W}   

                           name power   rarity release_date  \
0             Steamflogger Boss     3     Rare         None   
16                  Serra Angel     4  Special   1999-09-04   
17                    Vengevine     4  Special         2013   
18         Geist of Saint Traft     2  Special         2014   
19  Thalia, Guardian of Thraben     2  Special         2015   

                                               source       subtypes  \
0                                                None  Goblin,Rigger   
16  Wizards of

In [None]:
with open('data/modernCardsFinal.pickle','rb') as pickle_in:
    cardsDfFinal = pickle.load(pickle_in)

# PRICE SCRAPING

If the file already exists in the data directory

In [9]:
with open('data/modernPrices.pickle','rb') as pickle_in:
    pricesDF = pickle.load(pickle_in)

If the data needs to be scraped

In [5]:
#This will scrape the prices from the formats included in the given format parameter (i.e. 'Modern' will scrape both Standard and Modern)
def scrapeSelectPage(url,formt):
    #Get the page
    r = requests.get(url)
    html_doc = r.text
    soup = BeautifulSoup(html_doc,'lxml')
    
    #Get the format blocks from the page
    formats = soup.find_all('div',class_='priceList-setMenu')
    pricesDF = pd.DataFrame()
    for form in formats:
        pricesDF = pricesDF.append(scrapeFormat(form))
        
        #Check if we've reached the desired format
        if 'priceList-setMenu-'+formt in form.attrs['class']:
            break
        
    return pricesDF

#Scrapes a format div from the /prices/select page
def scrapeFormat(soup):
    #Select the first li element in the format's div
    element = soup.li
    
    #Skip the first "Quick Prices" block
    element = element.find_next_sibling('li',class_='priceList-setMenu-header')
    
    #Create empty dataframe to append to
    formatDF = pd.DataFrame()
    
    #Append the Dataframes obtained by scraping each set
    while(element.find_next_sibling('li') != None):
        #Iterate
        element = element.find_next_sibling('li')
        
        #The set elements don't have classes associated with them
        if 'class' not in element.attrs:
            #Scrape the cards in the set's page and add the name of the set
            setDF = scrapeSet('https://www.mtggoldfish.com'+element.a.attrs['href']+'#paper')
            setDF['setname'] = element.get_text().strip()
            
            formatDF = formatDF.append(setDF)
            
    return formatDF

#Scrapes all of the prices from the given set page
def scrapeSet(url):
    #Get the page
    r = requests.get(url)
    html_doc = r.text
    soup = BeautifulSoup(html_doc,'lxml')
    
    #Select the cards table
    element = soup.find('table',class_='tablesorter-bootstrap-popover-online').tbody
    
    #Build a list that will be filled with dictionaries to be converted to a DataFrame
    cardList = []
    
    #Loop through rows and scrape the name and price
    for row in element.find_all('tr'):
        #Get the td elements
        row = row.find_all('td')
        
        cardList.append({'name':row[0].a.get_text().strip(),'price':row[3].get_text().strip(),'set':row[1].get_text()})
    
    return pd.DataFrame(cardList)

In [6]:
pricesDF = scrapeSelectPage('https://www.mtggoldfish.com/prices/select','Modern')

                       name  price  set           setname
0        Rekindling Phoenix  20.64  RIX  Rivals of Ixalan
1  Kumena, Tyrant of Orazca   8.02  RIX  Rivals of Ixalan
2  Angrath, Minotaur Pirate   6.00  RIX  Rivals of Ixalan
3   Vraska, Scheming Gorgon   5.00  RIX  Rivals of Ixalan
4          Jadelight Ranger   3.83  RIX  Rivals of Ixalan
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13816 entries, 0 to 285
Data columns (total 4 columns):
name       13816 non-null object
price      13816 non-null object
set        13816 non-null object
setname    13816 non-null object
dtypes: object(4)
memory usage: 539.7+ KB


In [8]:
with open('data/modernPrices.pickle','wb') as pickle_out:
    pickle.dump(pricesDF,pickle_out)

Look at the DF

In [10]:
print(pricesDF.head())
pricesDF.info()

                       name  price  set           setname
0        Rekindling Phoenix  20.64  RIX  Rivals of Ixalan
1  Kumena, Tyrant of Orazca   8.02  RIX  Rivals of Ixalan
2  Angrath, Minotaur Pirate   6.00  RIX  Rivals of Ixalan
3   Vraska, Scheming Gorgon   5.00  RIX  Rivals of Ixalan
4          Jadelight Ranger   3.83  RIX  Rivals of Ixalan
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13816 entries, 0 to 285
Data columns (total 4 columns):
name       13816 non-null object
price      13816 non-null object
set        13816 non-null object
setname    13816 non-null object
dtypes: object(4)
memory usage: 539.7+ KB


# MELD

In [34]:
cardsWithPrices = pricesDF.merge(cardsDfFinal,on=['name','set'])
cardsWithPrices = cardsWithPrices.set_index(['set','name']).sort_index()

In [51]:
print(cardsWithPrices.loc[(slice(None),'Liliana of the Veil'),'price'])
print('---------------------------------------------------------------------')
#Where's Liliana??
print(cardsWithPrices.sort_values(by='price', ascending=False)['price'].head(20))
print('---------------------------------------------------------------------')
cardsWithPrices.info()

set  name               
ISD  Liliana of the Veil    53.64
MM3  Liliana of the Veil    53.27
Name: price, dtype: object
---------------------------------------------------------------------
set  name                   
M11  Leyline of the Void        9.77
GPT  Leyline of the Void        9.56
MM3  Griselbrand                9.56
CHK  Through the Breach         9.41
MM2  Emrakul, the Aeons Torn    9.35
EMN  Grim Flayer                9.20
TSB  The Rack                   9.02
OGW  Thought-Knot Seer          8.94
ROE  Emrakul, the Aeons Torn    8.90
ZEN  Bloodghast                 8.83
AER  Glint-Sleeve Siphoner      8.63
TSP  Ancestral Vision           8.54
M12  Inferno Titan              8.05
M11  Inferno Titan              7.96
SOI  Nahiri, the Harbinger      7.93
MM3  Gifts Ungiven              7.90
MMA  Gifts Ungiven              7.88
MM3  Past in Flames             7.84
M11  Primeval Titan             7.82
CHK  Gifts Ungiven              7.81
Name: price, dtype: object
--------------