In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import warnings
import re
import joblib
import string
import os

In [3]:
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
DATA_DIR = './data/'
STATIC_DIR = '../../static/data/'

# Functions and Classes

In [5]:
def getSpecializations(profession):
    prof_data = dict()
    
    stats = ['skill', 'multicraft', 'resourcefulness', 'ingenuity']
    for stat in stats:
        prof_data[stat] = dict()
        with open(DATA_DIR+profession.lower()+'_specializations_'+stat+'.csv', 'r') as file:
            data = file.readlines()[1:]
            for line in data:
                line = line.strip().split(',')
                node = line[0]
                maxPoints = int(line[1])
                gainPerPoint = int(line[2])
                gains = line[3:][:maxPoints//5+1]
                gains = [int(x) for x in gains]
                    
                prof_data[stat][node] = dict(zip(np.arange(start=0, stop=maxPoints+1, step=5), gains))
                prof_data[stat][node]['scaling'] = gainPerPoint
                
    return prof_data

In [6]:
def getItems(profession):
    prof_items = dict()
    
    with open(DATA_DIR+profession+'_specializations_items.csv', 'r') as file:
        data = file.readlines()[1:]
        
        for line in data:
            line = line.strip().split(',')
            itemID = int(line[0])
            itemName = line[1]
            
            lastIndex = len(np.where(np.array(line)!='')[0])
            nodes = line[2:lastIndex]
            
            prof_items[itemID] = dict()
            prof_items[itemID]['itemName'] = itemName
            prof_items[itemID]['nodes'] = nodes
            
    return prof_items

In [7]:
def scrapeIcon(itemID):
    url = f'https://www.wowhead.com/item={itemID}'
    soup = BeautifulSoup(requests.get(url).text) 
    
    #wowhead no longer has this href so need to us another method
    #link = soup.find(name='link', attrs={'rel':'image_src'})['href'] 
    
    #if link.find('http') < 0:
    #    icon_links.append(None)
    #    continue
    #    
    #first = link.find('http', 1)
    #second = link.find('http', 2)
    #if second >= first:
    #    icon_links.append(link[second:])
    #else:
    #    assert('what')
    
    #new method using regex
    
    #string1 finds strings preceded by:   "{itemID}:{" 
    #and are also followed by:    ,"screenshot"
    #the strings cannot include the symbol:   }
    string1 = re.search(r'(?<="'+f'{itemID}'+r'":{)[^}]+(?=,"screenshot")', str(soup)).group()
    
    #string2 searches string1 for strings preceded by:      "icon":"
    #and are also followed by:      ")
    #that only contain a-z, A-Z, 0-9, _, and -
    string2 = re.search(r'(?<="icon":")[\w-]+(?=")', string1).group()
    site = 'https://wow.zamimg.com/images/wow/icons/large/'+string2+'.jpg'
    
    status_code = requests.get(site).status_code
    
    if status_code==200:
        return {itemID: site}
    else:
        return {itemID: None}

In [8]:
def check_id(old_id):
    url = f'https://www.wowhead.com/item={old_id}?xml'
    html = requests.get(url).text
    soup = BeautifulSoup(html, features='xml')
    name = soup.find('name').text
    text = soup.find('htmlTooltip').text
    
    #check for quality tier information
    if text.find('quality-tier1') >= 0:
        new_id = old_id+2
        new_id_lower = old_id-2
    elif text.find('quality-tier2') >= 0:
        new_id = old_id+1
        new_id_lower = old_id-1
    else: #either its tier3 or it doesn't have tiers, in which use the old_id
        return {old_id: old_id}
    
    
    #wasn't a tier 3 item, so check the calculated id for if the name matches and is tier 3
    #return the new id if it is the same name and tier 3, else return -1 for manual checking
    try:
        url = f'https://www.wowhead.com/item={new_id}?xml'
        html = requests.get(url).text
        soup = BeautifulSoup(html, features='xml')
        if soup.find('name').text == name and soup.find('htmlTooltip').text.find('quality-tier3') >= 0:
            return {old_id: new_id}
    except:
        pass
    
    try:
        url = f'https://www.wowhead.com/item={new_id_lower}?xml'
        html = requests.get(url).text
        soup = BeautifulSoup(html, features='xml')
        if soup.find('name').text == name and soup.find('htmlTooltip').text.find('quality-tier3') >= 0:
            return {old_id: new_id_lower}
    except:
        return {old_id: -1}
        
    return {old_id: -1}

In [9]:
def outcomeQuality(skill, difficulty, tag):
    if tag.lower()[:4]=='gear':
        arr = np.array([1, 0.2*difficulty, 0.5*difficulty, 0.8*difficulty, difficulty])
    else:
        arr = np.array([1, 0.5*difficulty, difficulty])
        
    return (skill >= arr).sum()

In [10]:
def getProfession(file, name):
    #### deprecated version ###
    prof = Profession(name)
    
    with open(file, 'r', encoding='utf-8-sig') as file:
        data = file.readlines()
        
    reagentStart = 14 #column number where reagents start, since the variable counts dont work with headers
    headers = data[0].strip().split(',')[:reagentStart] #no headers for reagents
    for line in data[1:]:
        itemInfo = line.strip().split(',')

        try:
            #take first index where an empty string occurs to indicate where input values stop
            reagentEnd = np.where(np.array(itemInfo)=='')[0].min()
        except:
            #no value existed -> all indices are used -> use None for slicing
            reagentEnd = None

        reagents = itemInfo[reagentStart:reagentEnd]
        reagents = np.array(reagents).astype(int)

        itemInfo = dict(zip(headers, itemInfo[:reagentStart]))
        reagents = dict(zip(reagents[::2], reagents[1::2]))

        #can't call float('np.nan'), so replace the string with value manually
        for k,v in itemInfo.items():
            if v == 'np.nan':
                itemInfo[k] = np.nan
                
        prof.add(itemName = itemInfo.get('itemName'),
                 itemID = itemInfo.get('itemID'),
                 reagents = reagents,
                 crafterName = itemInfo.get('crafterName'),
                 tag = itemInfo.get('tag'),
                 difficulty = itemInfo.get('difficulty'),
                 multicraft = itemInfo.get('multicraft'),
                 quantity = itemInfo.get('quantity'),
                 skill = itemInfo.get('skill'),
                 rarity = itemInfo.get('rarity'),
                 hasReagentQualities = (itemInfo.get('hasReagentQualities').title()=='True'),
                 hasEmbellishmentSlot = (itemInfo.get('hasEmbellishmentSlot').title()=='True'),
                 hasMissiveSlot = (itemInfo.get('hasMissiveSlot').title()=='True'),
                 hasSafetyComponent = (itemInfo.get('hasSafetyComponent').title()=='True'),
                 hasCrestSlot = (itemInfo.get('hasCrestSlot').title()=='True'))
        
    return prof

In [11]:
def getProfession(file, name):
    prof = Profession(name)
    
    baseFile = file[len(DATA_DIR):-4]
    if baseFile[-1] in string.digits:
        num = baseFile[-1]
        baseFile = baseFile[:-1]
    else:
        num = ''
    
    with open(file, 'r', encoding='utf-8-sig') as file:
        data = file.readlines()
        
    reagentStart = 14 #column number where reagents start, since the variable counts dont work with headers
    headers = data[0].strip().split(',')[:reagentStart] #no headers for reagents
    for line in data[1:]:
        itemInfo = line.strip().split(',')

        try:
            #take first index where an empty string occurs to indicate where input values stop
            reagentEnd = np.where(np.array(itemInfo)=='')[0].min()
        except:
            #no value existed -> all indices are used -> use None for slicing
            reagentEnd = None

        reagents = itemInfo[reagentStart:reagentEnd]
        reagents = np.array(reagents).astype(int)

        itemInfo = dict(zip(headers, itemInfo[:reagentStart]))
        reagents = dict(zip(reagents[::2], reagents[1::2]))

        #can't call float('np.nan'), so replace the string with value manually
        for k,v in itemInfo.items():
            if v == 'np.nan':
                itemInfo[k] = np.nan
                
        itemID = int(itemInfo.get('itemID'))
        
        skill = float(itemInfo.get('skill'))
        skill += float(baseStats.loc[baseStats['profession']==baseFile+num, 'level'].iloc[0])
        skill += float(baseStats.loc[baseStats['profession']==baseFile+num, 'skill'].iloc[0])
        
        multicraft = float(itemInfo.get('multicraft'))
        multicraft += float(baseStats.loc[baseStats['profession']==baseFile+num, 'multicraft'].iloc[0])
        
        if name.lower() != 'cooking':   
            specializationInfo = getSpecializations(name.lower())
            specializationItems = getItems(name.lower())
            knowledgePoints = pd.read_csv(DATA_DIR+baseFile+'_knowledge'+num+'.csv')
            
            skill += getStat(specializationInfo['skill'], knowledgePoints, specializationItems[itemID]['nodes'])
            multicraft += getStat(specializationInfo['multicraft'], knowledgePoints, 
                                  specializationItems[itemID]['nodes'])
            
        multicraft = np.round(multicraft/33, 1) #convert from stat to percent
                
        prof.add(itemName = itemInfo.get('itemName'),
                 itemID = itemID,
                 reagents = reagents,
                 crafterName = itemInfo.get('crafterName'),
                 tag = itemInfo.get('tag'),
                 difficulty = itemInfo.get('difficulty'),
                 multicraft = multicraft,
                 quantity = itemInfo.get('quantity'),
                 skill = skill,
                 rarity = itemInfo.get('rarity'),
                 hasReagentQualities = (itemInfo.get('hasReagentQualities').title()=='True'),
                 hasEmbellishmentSlot = (itemInfo.get('hasEmbellishmentSlot').title()=='True'),
                 hasMissiveSlot = (itemInfo.get('hasMissiveSlot').title()=='True'),
                 hasSafetyComponent = (itemInfo.get('hasSafetyComponent').title()=='True'),
                 hasCrestSlot = (itemInfo.get('hasCrestSlot').title()=='True'))
        
    return prof

In [12]:
def getStat(nodeValues, knowledgePoints, nodes):    
    statValue = 0
    
    for node in nodes:
        try:
            
            knowledge = knowledgePoints.loc[knowledgePoints['node']==node, 'points'].iloc[0]
        except:
            print(node)
            display(nodeValues)
            display(knowledgePoints)
            display(knowledgePoints.loc[knowledgePoints['node']==node, :])
            assert False
        # skip if node not unlocked
        if knowledge == -1:
            continue
        
        breakpoints = np.arange(start=0, stop=knowledge+1, step=5)
        for breakpoint in breakpoints:
            try:
                statValue += nodeValues[node][breakpoint]
            except:
                display(node)
                display(breakpoint)
                display(nodeValues)
                display(knowledgePoints)
                assert False
        statValue += nodeValues[node]['scaling']*knowledge
        
    return statValue

In [13]:
def updateReagents(reagents: dict, replacementIDs: dict):
    return {replacementIDs.get(reagent, reagent):count for reagent,count in reagents.items()}

In [14]:
class Profession:
    __all_data = None
    __name = None
    
    def __init__(self, profession):
        self.__all_data = list()
        self.__name = profession
        
    def add(self, itemName, itemID, reagents, crafterName, tag, difficulty, multicraft, quantity, skill,
            rarity, hasReagentQualities, hasEmbellishmentSlot, hasMissiveSlot, hasSafetyComponent, hasCrestSlot):

            self.__all_data.append([self.__name, crafterName, itemID, itemName, None, reagents, tag, rarity, 
                                    difficulty, skill, quantity, multicraft, hasReagentQualities, 
                                    hasEmbellishmentSlot, hasMissiveSlot, hasSafetyComponent, hasCrestSlot])
            
    def get_table(self):
        columns = ['profession', 'character', 'itemID', 'item', 'icon', 'reagents', 'tag', 'rarity', 
                   'difficulty', 'skill1', 'baseQuantity', 'multicraftPercent', 'hasReagentQualities', 
                   'hasEmbellishmentSlot', 'hasMissiveSlot', 'hasSafetyComponent', 'hasCrestSlot']
        dtypes = ['string', 'string', 'int32', 'string', 'string', 'object', 'string', 'string', float, float, 
                  'string', float, bool, bool, bool, bool, bool, bool]
        df = pd.DataFrame(columns=columns, data=self.__all_data)
        return df.astype(dict(zip(columns, dtypes)))
        
    def set_table(self, df):
        self.__all_data = df.to_numpy()

# Initial DataFrames

In [15]:
items_columns = ['itemID', 'item', 'icon', 'tag', 'rarity']
items_dtypes = ['int32', 'string', 'string', 'string', 'string']
items = pd.DataFrame(columns=items_columns)

professions_columns = ['profession', 'itemID', 'reagents', 'hasReagentQualities', 'hasEmbellishmentSlot',
                       'hasMissiveSlot', 'hasSafetyComponent', 'hasCrestSlot']
professions_dtypes = ['string', 'int32', dict, bool, bool, bool, bool, bool]
professions = pd.DataFrame(columns=professions_columns)

crafting_columns = ['itemID', 'difficulty', 'character', 'skill1', 'base_quantity', 'multicraft_percent']
crafting_dtypes = ['int32', 'int16', 'string', float, 'string', float]
crafting = pd.DataFrame(columns=crafting_columns)

In [16]:
baseStats = pd.read_csv(DATA_DIR+'base_stats.csv')

In [17]:
alchemy = getProfession(DATA_DIR+'alchemy.csv', 'Alchemy')
alchemy2 = getProfession(DATA_DIR+'alchemy2.csv', 'Alchemy')
blacksmithing = getProfession(DATA_DIR+'blacksmithing.csv', 'Blacksmithing')
blacksmithing2 = getProfession(DATA_DIR+'blacksmithing2.csv', 'Blacksmithing')
cooking = getProfession(DATA_DIR+'cooking.csv', 'Cooking')
enchanting = getProfession(DATA_DIR+'enchanting.csv', 'Enchanting')
enchanting2 = getProfession(DATA_DIR+'enchanting2.csv', 'Enchanting')
engineering = getProfession(DATA_DIR+'engineering.csv', 'Engineering')
inscription = getProfession(DATA_DIR+'inscription.csv', 'Inscription')
inscription2 = getProfession(DATA_DIR+'inscription2.csv', 'Inscription')
jewelcrafting = getProfession(DATA_DIR+'jewelcrafting.csv', 'Jewelcrafting')
jewelcrafting2 = getProfession(DATA_DIR+'jewelcrafting2.csv', 'Jewelcrafting')
leatherworking = getProfession(DATA_DIR+'leatherworking.csv', 'Leatherworking')
leatherworking2 = getProfession(DATA_DIR+'leatherworking2.csv', 'Leatherworking')
tailoring = getProfession(DATA_DIR+'tailoring.csv', 'Tailoring')
tailoring2 = getProfession(DATA_DIR+'tailoring2.csv', 'Tailoring')

# DataFrame Merging

In [18]:
#concatenate tables
#sort by itemID and skill (descending) so items are paired with higher skill on top
#keep the first entry for each itemID (i.e., the highest skill entry)
#break same skill tie by sorting by name such that primary crafter is at the top
sortCols = ['itemID', 'skill1', 'character']
sortVals = [True, False]
nameAscending = {'alchemy': False,
                 'blacksmithing': False,
                 'enchanting': True,
                 'inscription': False,
                 'jewelcrafting': True,
                 'leatherworking': True,
                 'tailoring': False}

all_data = pd.DataFrame()
for df1, df2, name in [(alchemy, alchemy2, 'alchemy'), (blacksmithing, blacksmithing2, 'blacksmithing'),
                       (enchanting, enchanting2, 'enchanting'), (inscription, inscription2, 'inscription'),
                       (jewelcrafting, jewelcrafting2, 'jewelcrafting'), 
                       (leatherworking, leatherworking2, 'leatherworking'), (tailoring, tailoring2, 'tailoring')]:
    df = pd.concat((df1.get_table(), df2.get_table()), ignore_index=True)
    df = df.sort_values(by=['itemID', 'skill1', 'character'], ascending=[True, False]+[nameAscending[name]])
    df = df.groupby('itemID', as_index=False).head(1)
    all_data = pd.concat((all_data, df), ignore_index=True)

In [19]:
all_data = pd.concat((all_data, cooking.get_table()), ignore_index=True)
all_data = pd.concat((all_data, engineering.get_table()), ignore_index=True)
all_data = all_data.reset_index(drop=True)

# Manual Adjustments

In [20]:
fixes = {'"Magically ""Infinite"" Messenger"': 'Magically "Infinite" Messenger'}
all_data['item'] = all_data['item'].apply(lambda x: fixes.get(x, x))

# Items DataFrame

In [21]:
#single dataframe of all items, including those listed in reagents
columns = ['itemID', 'item', 'icon']
items = all_data.loc[:, ['itemID', 'item', 'icon']]
items = items.drop_duplicates()
        
for index, row in tqdm(all_data.iterrows(), total=len(all_data)):
    for reagent in row['reagents'].keys():
        if reagent not in items.loc[:, 'itemID'].to_numpy():
            url = f'https://www.wowhead.com/item={reagent}?xml'
            html = requests.get(url).text
            soup = BeautifulSoup(html, features='xml')
            name = soup.find('name').text
            df = pd.DataFrame(columns=columns, data=[[reagent, name, None]])
            items = pd.concat((items, df))

100%|██████████| 608/608 [00:50<00:00, 12.09it/s]


# Item Icons

In [22]:
method = "LOAD"
icon_file = STATIC_DIR+'icons.pkl'

if not os.path.isfile(icon_file) or method == "UPDATE":
    num_cores = joblib.cpu_count()
    all_jobs = [joblib.delayed(scrapeIcon)(itemID) for itemID in items['itemID'].values]
    results = joblib.Parallel(n_jobs=num_cores, verbose=10)(all_jobs)
    icon_links = {int(k):v for d in results for k,v in d.items()}
    icons_df = pd.DataFrame()
    icons_df['itemID'] = icon_links.keys()
    icons_df['link'] = icon_links.values()
    icons_df.to_pickle(icon_file)
elif os.path.isfile(icon_file) and method == "LOAD":
    icon_links = pd.read_pickle(icon_file)
    icon_links = dict(zip(icon_links['itemID'].values, icon_links['link']))

In [23]:
#update icons in dataframes
all_data['icon'] = all_data['itemID'].map(icon_links)
items['icon'] = items['itemID'].map(icon_links)

In [24]:
#ensure all items have icons
df = all_data.loc[all_data['icon'].isna(), ['itemID', 'icon']]
assert(len(df)==0)

In [25]:
icon_links

{210816: 'https://wow.zamimg.com/images/wow/icons/large/inv_10_alchemy_alchemystone_color4.jpg',
 211802: 'https://wow.zamimg.com/images/wow/icons/large/inv_misc_food_legion_gooamberpink_multi.jpg',
 211803: 'https://wow.zamimg.com/images/wow/icons/large/inv_misc_food_legion_goooil_multi.jpg',
 211804: 'https://wow.zamimg.com/images/wow/icons/large/inv_misc_food_legion_gooslime_multi.jpg',
 211878: 'https://wow.zamimg.com/images/wow/icons/large/inv_flask_red.jpg',
 212239: 'https://wow.zamimg.com/images/wow/icons/large/inv_flask_blue.jpg',
 212242: 'https://wow.zamimg.com/images/wow/icons/large/inv_alchemy_elixir_06.jpg',
 212245: 'https://wow.zamimg.com/images/wow/icons/large/inv_flask_green.jpg',
 212248: 'https://wow.zamimg.com/images/wow/icons/large/inv_potion_139.jpg',
 212251: 'https://wow.zamimg.com/images/wow/icons/large/inv_potion_62.jpg',
 212254: 'https://wow.zamimg.com/images/wow/icons/large/inv_summerfest_firepotion.jpg',
 212257: 'https://wow.zamimg.com/images/wow/icons/l

# Update ItemIDs to be Rank 3

In [26]:
method = "LOAD"
itemIDfile = STATIC_DIR+'itemIDUpdates.pkl'

if not os.path.isfile(itemIDfile) or method == 'UPDATE':
    old_ids = items['itemID'].astype(int).to_numpy()
    all_jobs = [joblib.delayed(check_id)(old_id) for old_id in old_ids]
    results = joblib.Parallel(n_jobs=num_cores, verbose=10)(all_jobs)
    new_ids = {int(k):int(v) for d in results for k,v in d.items()}
    new_ids_df = pd.DataFrame()
    new_ids_df['oldID'] = new_ids.keys()
    new_ids_df['newID'] = new_ids.values()
    new_ids_df.to_pickle(itemIDfile)
elif os.path.isfile(itemIDfile) and method == 'LOAD':
    new_ids = pd.read_pickle(itemIDfile)
    new_ids = dict(zip(new_ids['oldID'].values, new_ids['newID'].values))

In [27]:
#manual ID fixes
for old_id in new_ids.keys():
    if old_id in range(224300, 224324): #gleeful glamours
        new_ids[old_id] = old_id+48
    elif old_id == 219952: #refulgent crystal
        new_ids[old_id] = 219955
    elif old_id == 212670: #thunderous hide
        new_ids[old_id] = 212673

In [28]:
#ensure all items with a rank 3 are listed at rank 3
assert -1 not in new_ids.values()

In [29]:
#update itemIDs in both dataframes
all_data['itemID'] = all_data['itemID'].map(new_ids)
items['itemID'] = items['itemID'].map(new_ids)

In [30]:
#update all item ids that are not at rank 3 with the found rank 3 ids
all_data['reagents'] = all_data['reagents'].apply(updateReagents, args=(new_ids,))

# Add Difficulties

In [31]:
#all mats rank 2
all_data['skill2'] = all_data['skill1']+all_data['difficulty']*0.2

#all mats rank 3
all_data['skill3'] = all_data['skill1']+all_data['difficulty']*0.4

extraDiff = {'safetyComponent':10,
             'missive':5,
             'embellishment':5,
             'weatheredCrest':100,
             'runedCrest':10,
             'gildedCrest':20,
             'combatant':0,
             'aspirant':50,
             'gladiator':150}

#safetycomponent+embellishment not currently possible
all_data['difficulty_safetycomponent'] = all_data['difficulty'] + extraDiff['safetyComponent']
all_data['difficulty_missive'] = all_data['difficulty'] + extraDiff['missive']
all_data['difficulty_embellishment'] = all_data['difficulty'] + extraDiff['embellishment']
all_data['difficulty_safetycomponent_missive'] = all_data['difficulty']
all_data['difficulty_missive_embellishment'] = all_data['difficulty'] + extraDiff['missive'] + extraDiff['embellishment']
all_data['difficulty_weathered'] = all_data['difficulty'] + extraDiff['weatheredCrest']
all_data['difficulty_weathered_safetycomponent'] = all_data['difficulty'] + extraDiff['weatheredCrest'] + extraDiff['safetyComponent']
all_data['difficulty_weathered_missive'] = all_data['difficulty'] + extraDiff['weatheredCrest'] + extraDiff['missive']
all_data['difficulty_weathered_embellishment'] = all_data['difficulty'] + extraDiff['weatheredCrest'] + extraDiff['embellishment']
all_data['difficulty_weathered_safetycomponent_missive'] = all_data['difficulty'] + extraDiff['weatheredCrest'] + extraDiff['safetyComponent'] + extraDiff['missive']
all_data['difficulty_weathered_missive_embellishment'] = all_data['difficulty'] + extraDiff['weatheredCrest'] + extraDiff['missive'] + extraDiff['embellishment']
all_data['difficulty_runed'] = all_data['difficulty'] + extraDiff['runedCrest']
all_data['difficulty_runed_safetycomponent'] = all_data['difficulty'] + extraDiff['runedCrest'] + extraDiff['safetyComponent']
all_data['difficulty_runed_missive'] = all_data['difficulty'] + extraDiff['runedCrest'] + extraDiff['missive']
all_data['difficulty_runed_embellishment'] = all_data['difficulty'] + extraDiff['runedCrest'] + extraDiff['embellishment']
all_data['difficulty_runed_safetycomponent_missive'] = all_data['difficulty'] + extraDiff['runedCrest'] + extraDiff['safetyComponent'] + extraDiff['missive']
all_data['difficulty_runed_missive_embellishment'] = all_data['difficulty'] + extraDiff['runedCrest'] + extraDiff['missive'] + extraDiff['embellishment']
all_data['difficulty_gilded'] = all_data['difficulty'] + extraDiff['gildedCrest']
all_data['difficulty_gilded_safetycomponent'] = all_data['difficulty'] + extraDiff['gildedCrest'] + extraDiff['safetyComponent']
all_data['difficulty_gilded_missive'] = all_data['difficulty'] + extraDiff['gildedCrest'] + extraDiff['missive']
all_data['difficulty_gilded_embellishment'] = all_data['difficulty'] + extraDiff['gildedCrest'] + extraDiff['embellishment']
all_data['difficulty_gilded_safetycomponent_missive'] = all_data['difficulty'] + extraDiff['gildedCrest'] + extraDiff['safetyComponent'] + extraDiff['missive']
all_data['difficulty_gilded_missive_embellishment'] = all_data['difficulty'] + extraDiff['gildedCrest'] + extraDiff['missive'] + extraDiff['embellishment']
all_data['difficulty_combatant'] = all_data['difficulty']+extraDiff['combatant']
all_data['difficulty_combatant_missive'] = all_data['difficulty']+extraDiff['combatant']+extraDiff['missive']
all_data['difficulty_combatant_embellishment'] = all_data['difficulty']+extraDiff['combatant']+extraDiff['embellishment']
all_data['difficulty_combatant_missive_embellishment'] = all_data['difficulty']+extraDiff['combatant']+extraDiff['missive']+extraDiff['embellishment']
all_data['difficulty_aspirant'] = all_data['difficulty']+extraDiff['aspirant']
all_data['difficulty_aspirant_missive'] = all_data['difficulty']+extraDiff['aspirant']+extraDiff['missive']
all_data['difficulty_aspirant_embellishment'] = all_data['difficulty']+extraDiff['aspirant']+extraDiff['embellishment']
all_data['difficulty_aspirant_missive_embellishment'] = all_data['difficulty']+extraDiff['aspirant']+extraDiff['missive']+extraDiff['embellishment']
all_data['difficulty_gladiator'] = all_data['difficulty']+extraDiff['gladiator']
all_data['difficulty_gladiator_missive'] = all_data['difficulty']+extraDiff['gladiator']+extraDiff['missive']
all_data['difficulty_gladiator_embellishment'] = all_data['difficulty']+extraDiff['gladiator']+extraDiff['embellishment']
all_data['difficulty_gladiator_missive_embellishment'] = all_data['difficulty']+extraDiff['gladiator']+extraDiff['missive']+extraDiff['embellishment']

In [32]:
modifiers = ['', '_safetycomponent', '_missive', '_embellishment', '_safetycomponent_missive', 
             '_missive_embellishment', '_weathered', '_weathered_safetycomponent', 
             '_weathered_safetycomponent_missive', '_weathered_missive', '_weathered_embellishment',
             '_weathered_missive_embellishment', '_runed', '_runed_safetycomponent', '_runed_safetycomponent_missive', '_runed_missive',
             '_runed_embellishment', '_runed_missive_embellishment', '_gilded', '_gilded_safetycomponent', 
             '_gilded_missive', '_gilded_safetycomponent_missive', '_gilded_embellishment', 
             '_gilded_missive_embellishment', '_combatant', '_combatant_missive', '_combatant_embellishment', 
             '_combatant_missive_embellishment', '_aspirant', '_aspirant_missive', '_aspirant_embellishment', 
             '_aspirant_missive_embellishment', '_gladiator', '_gladiator_missive', '_gladiator_embellishment', 
             '_gladiator_missive_embellishment']
    
for modifier in modifiers:
    all_data['rank1mats_outcome'+modifier] = all_data.apply(lambda row: outcomeQuality(row['skill1'], 
                                                                                       row['difficulty'+modifier], 
                                                                                       row['tag']), axis=1)
    all_data['rank2mats_outcome'+modifier] = all_data.apply(lambda row: outcomeQuality(row['skill2'], 
                                                                                       row['difficulty'+modifier], 
                                                                                       row['tag']), axis=1)
    all_data['rank3mats_outcome'+modifier] = all_data.apply(lambda row: outcomeQuality(row['skill3'], 
                                                                                       row['difficulty'+modifier], 
                                                                                       row['tag']), axis=1)

# Proper Case tag field

In [33]:
all_data['tag'] = all_data['tag'].apply(lambda x: x.title() if x != "gear (pvp)" else "Gear (PvP)")

# Remove pd.NA and Sorting

In [34]:
all_data = all_data.reset_index(drop=True)

all_data['character'] = all_data['character'].replace({pd.NA:None, 'None':None})
all_data = all_data.sort_values(by=['profession', 'item'], ascending=[True, True])

assert(len(all_data['character'].unique()==8) or len(all_data['character'].unique()==9))

# File Saving

In [35]:
items.to_pickle(STATIC_DIR+'items_TWW.pkl')

In [36]:
all_data.to_pickle(STATIC_DIR+'data_TWW.pkl')

In [3]:
items = pd.read_pickle(STATIC_DIR+'items_TWW.pkl')
all_data = pd.read_pickle(STATIC_DIR+'data_TWW.pkl')

# Issue Testing