In [1]:
import os
import numpy as np
import pandas as pd

## Read Data

In [2]:
dfs_dict = {}
for file in os.listdir('../processed_csv'):
    if file != 'draft_guide_data.csv':
        year = file.replace('.csv', '')
        df = pd.read_csv(f'../processed_csv/{file}')
        df['year'] = int(year)
        dfs_dict[year] = df

## Determine Tiers

In [3]:
dfs_dict['2020'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP                             
2     REALIZABLE ALL-STAR UPSIDE                    3
3     HIGH LEVERAGE STARTERS                        8
4     UPSIDE SWINGS, POTENTIAL STARTERS             6
5     ROTATION PLAYERS                             32
6     SECOND ROUND FLIERS AND PRIORITY TWO WAYS    11
Name: PLAYER, dtype: int64

In [4]:
dfs_dict['2021'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP          
1     Superstar Upside           1
2     All-Star Upside            3
3     High Leverage Starters     2
4     Potential Starters        16
5     Rotation Players          21
6     2nd Rd. Fliers            18
7     Undrafted Fliers          17
Name: PLAYER, dtype: int64

In [5]:
dfs_dict['2022'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP                              
2     All-Star Upside                                4
3     High-Leverage Starters                         5
4     Starters and Difference-Makers                12
5     Late First-Round, Guaranteed Contract Guys    28
6     Two-Way Contracts                             26
Name: PLAYER, dtype: int64

In [6]:
dfs_dict['2023'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP                      
1     Projectable All-NBA Upside             1
2     Projectable All-Star Upside            3
3     High-Leverage Starters                 4
4     Starter/All-Star Tool Swings           7
5     Rotation Players and Upside Swings    20
6     Second-Round Guarantee Swings          6
7     Priority Two-Ways                     11
8     Two-Ways, Stashes, Exhibit 10s        22
Vic   Victor Wembanyama                      1
Name: PLAYER, dtype: int64

- Him
- All-Star Upside
- High-Leverage Starters
- Upside Swings
- Rotation Players
- Second Round

In [7]:
global_tier_dict = {5: 'All-Star Upside',
                    4: 'High-Leverage Starters',
                    3: 'Upside Swings',
                    2: 'Rotation Players',
                    1: 'Second Round and Two-Ways'}

In [8]:
def transform_tier(df, tier_map,
                   global_tier_dict=global_tier_dict):
    df['TIER'] = df['TIER'].map(tier_map)
    df['TIER_DESCRIP'] = df['TIER'].map(global_tier_dict)
    return df

#### 2020

In [9]:
dfs_dict['2020'] = transform_tier(dfs_dict['2020'], {2: 5, 3: 4, 4: 3, 5: 2, 6: 1})
dfs_dict['2020'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP             
1     Second Round and Two-Ways    11
2     Rotation Players             32
3     Upside Swings                 6
4     High-Leverage Starters        8
5     All-Star Upside               3
Name: PLAYER, dtype: int64

#### 2021

In [10]:
dfs_dict['2021'] = transform_tier(dfs_dict['2021'], {1: 5, 2: 5, 3: 4, 4: 3, 5: 2, 6: 1, 7: 1})
dfs_dict['2021'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP             
1     Second Round and Two-Ways    35
2     Rotation Players             21
3     Upside Swings                16
4     High-Leverage Starters        2
5     All-Star Upside               4
Name: PLAYER, dtype: int64

#### 2022

In [11]:
dfs_dict['2022'] = transform_tier(dfs_dict['2022'], {2: 5, 3: 4, 4: 3, 5: 2, 6: 1})
dfs_dict['2022'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP             
1     Second Round and Two-Ways    26
2     Rotation Players             28
3     Upside Swings                12
4     High-Leverage Starters        5
5     All-Star Upside               4
Name: PLAYER, dtype: int64

#### 2023

In [12]:
dfs_dict['2023'] = transform_tier(dfs_dict['2023'], {'Vic': 5, '1': 5, '2': 5, '3': 4, 
                                                     '4': 3, '5': 2, '6': 1, '7': 1, '8': 1})
dfs_dict['2023'].groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP             
1     Second Round and Two-Ways    39
2     Rotation Players             20
3     Upside Swings                 7
4     High-Leverage Starters        4
5     All-Star Upside               5
Name: PLAYER, dtype: int64

## Combine Data

In [13]:
for year in dfs_dict:
    print(year, dfs_dict[year].columns)
    print()

2020 Index(['RANK', 'PLAYER', 'TEAM', 'POSITION', 'AGE', 'HEIGHT', 'WEIGHT',
       'WINGSPAN', 'TIER', 'STRENGTHS', 'WEAKNESSES', 'SUMMARY',
       'TIER_DESCRIP', 'year'],
      dtype='object')

2021 Index(['RANK', 'PLAYER', 'SCHOOL/TEAM', 'POS', 'AGE', 'HT', 'WING', 'TIER',
       'STRENGTHS', 'WEAKNESSES', 'SUMMARY', 'TIER_DESCRIP', 'year'],
      dtype='object')

2023 Index(['RANK', 'PLAYER', 'SCHOOL/TEAM', 'POS', 'AGE', 'HT', 'WING', 'TIER',
       'BACKGROUND', 'STRENGTHS', 'WEAKNESSES', 'SUMMARY', 'TIER_DESCRIP',
       'year'],
      dtype='object')

2022 Index(['RANK', 'PLAYER', 'SCHOOL/TEAM', 'POS', 'AGE', 'HT', 'WING', 'TIER',
       'STRENGTHS', 'WEAKNESSES', 'SUMMARY', 'TIER_DESCRIP', 'year'],
      dtype='object')



In [14]:
dfs_dict['2020'] = dfs_dict['2020'].rename(columns={'TEAM': 'SCHOOL/TEAM',
                                                    'POSITION': 'POS',
                                                    'HEIGHT': 'HT',
                                                    'WINGSPAN': 'WING'})

In [15]:
cols = ['RANK', 'PLAYER', 'SCHOOL/TEAM', 'POS', 'AGE', 'HT', 'WING', 'TIER',
       'STRENGTHS', 'WEAKNESSES', 'SUMMARY', 'TIER_DESCRIP', 'year']
df = pd.concat([dfs_dict[year][cols]
                for year in dfs_dict])\
       .reset_index().drop('index', axis=1)
df.shape

(288, 13)

In [16]:
df.groupby(['TIER', 'TIER_DESCRIP'])['PLAYER'].count()

TIER  TIER_DESCRIP             
1     Second Round and Two-Ways    111
2     Rotation Players             101
3     Upside Swings                 41
4     High-Leverage Starters        19
5     All-Star Upside               16
Name: PLAYER, dtype: int64

### Check Tiers

In [17]:
df[df['TIER'] == 5].PLAYER.values

array(['LaMelo Ball', 'James Wiseman', 'Anthony Edwards',
       'Cade Cunningham', 'Jalen Suggs', 'Evan Mobley', 'Jalen Green',
       'Victor Wembanyama', 'Scoot Henderson', 'Cam Whitmore',
       'Brandon Miller', 'Amen Thompson', 'Chet Holmgren',
       'Jabari Smith Jr.', 'Paolo Banchero', 'Jaden Ivey'], dtype=object)

In [18]:
df[df['TIER'] == 4].PLAYER.values

array(['Onyeka Okongwu', 'Isaac Okoro', 'Deni Avdija',
       'Tyrese Haliburton', 'Obi Toppin', 'Patrick Williams',
       'Killian Hayes', 'Devin Vassell', 'Jonathan Kuminga',
       'Scottie Barnes', 'Jarace Walker', 'Taylor Hendricks',
       'Anthony Black', 'Ausar Thompson', 'Bennedict Mathurin',
       'Jeremy Sochan', 'Dyson Daniels', 'Keegan Murray',
       'Shaedon Sharpe'], dtype=object)

In [19]:
df[df['TIER'] == 3].PLAYER.values

array(['Kira Lewis Jr.', 'R.J. Hampton', 'Tyrese Maxey',
       'Aleksej Pokusevski', 'Saddiq Bey', 'Aaron Nesmith', 'Moses Moody',
       'Alperen Sengun', 'James Bouknight', 'Davion Mitchell',
       'Usman Garuba', 'Josh Giddey', 'Chris Duarte', 'Franz Wagner',
       'Ziaire Williams', 'Trey Murphy III', 'Jared Butler',
       'Corey Kispert', 'Kai Jones', 'Jaden Springer', 'Keon Johnson',
       'Miles McBride', 'Dereck Lively II', 'Kobe Bufkin',
       'Bilal Coulibaly', 'Leonard Miller', 'Cason Wallace',
       'Jalen Hood-Schifino', 'Gradey Dick', 'Johnny Davis', 'AJ Griffin',
       'Jalen Duren', 'Tari Eason', 'TyTy Washington Jr.',
       'Mark Williams', 'Ousmane Dieng', 'Dalen Terry', 'Jalen Williams',
       'Ochai Agbaji', 'Jaden Hardy', 'Malaki Branham'], dtype=object)

## Check Length

In [20]:
temp = df.copy()
temp['STRENGTHS_LENGTH'] = temp['STRENGTHS'].apply(len)
temp['WEAKNESSES_LENGTH'] = temp['WEAKNESSES'].apply(len)
temp.groupby(['TIER', 'TIER_DESCRIP'])[['STRENGTHS_LENGTH', 'WEAKNESSES_LENGTH']].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,STRENGTHS_LENGTH,WEAKNESSES_LENGTH
TIER,TIER_DESCRIP,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Second Round and Two-Ways,2106.783784,1837.738739
2,Rotation Players,2865.188119,2334.009901
3,Upside Swings,3507.0,2347.04878
4,High-Leverage Starters,3725.105263,2299.894737
5,All-Star Upside,4676.0625,2639.6875


## Save Data

In [22]:
df.to_csv('../processed_csv/draft_guide_data.csv', index=False)