In [1]:
import pandas as pd

In [2]:
fga = pd.read_csv('NBA_22_23_FGA.csv', sep=';')
fga.head()

Unnamed: 0,PLAYER,PLAY TYPE,MADE,SHOT TYPE,BOXSCORE,VTM,HTM,Game Date,PERIOD,TIME REMAINING,SHOT DISTANCE (FT),TEAM
0,Marcus Smart,Driving Floating Bank Jump Shot,✔ Made Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,11:15,13,Boston Celtics
1,Jayson Tatum,Jump Shot,✔ Made Shot,3PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,10:46,23,Boston Celtics
2,Derrick White,Running Layup Shot,✖ Missed Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,10:04,1,Boston Celtics
3,Al Horford,Cutting Layup Shot,✖ Missed Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,09:53,2,Boston Celtics
4,Jayson Tatum,Running Layup Shot,✔ Made Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,09:42,0,Boston Celtics


In [3]:
## Tabela com o nome e o código de todos os time da NBA
cod_teams = pd.read_csv('cod_teams.csv', sep=';')
cod_teams.head()

Unnamed: 0,name,cod
0,Atlanta Hawks,ATL
1,Boston Celtics,BOS
2,Brooklyn Nets,BKN
3,Charlotte Hornets,CHA
4,Chicago Bulls,CHI


In [4]:
## Tabela com o nome, altura, peso e posição do draft de todos os jogadores da temporada 2022-2023 da NBA
info_players = pd.read_csv('players_info.csv', sep=',')
info_players = info_players.rename(columns={"Player": "PLAYER"})
info_players.head(10)

Unnamed: 0,PLAYER,Age,Height,Weight,DRAFT NUMBER
0,A.J. Lawson,22,6-6,179,Undrafted
1,AJ Green,23,6-5,190,Undrafted
2,AJ Griffin,19,6-6,220,16
3,Aaron Gordon,27,6-8,235,4
4,Aaron Holiday,26,6-0,185,23
5,Aaron Nesmith,23,6-5,215,14
6,Aaron Wiggins,24,6-5,190,55
7,Admiral Schofield,26,6-5,241,42
8,Al Horford,37,6-9,240,3
9,Alec Burks,31,6-6,214,12


In [5]:
# Juntando as informações dos arremessos com as informações dos jogadores
fga = fga.merge(info_players, on='PLAYER', how='inner')
fga.head()

Unnamed: 0,PLAYER,PLAY TYPE,MADE,SHOT TYPE,BOXSCORE,VTM,HTM,Game Date,PERIOD,TIME REMAINING,SHOT DISTANCE (FT),TEAM,Age,Height,Weight,DRAFT NUMBER
0,Marcus Smart,Driving Floating Bank Jump Shot,✔ Made Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,11:15,13,Boston Celtics,29,6-4,220,6
1,Marcus Smart,Jump Shot,✖ Missed Shot,3PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,06:42,22,Boston Celtics,29,6-4,220,6
2,Marcus Smart,Driving Floating Jump Shot,✖ Missed Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",1,03:25,6,Boston Celtics,29,6-4,220,6
3,Marcus Smart,Pullup Jump shot,✔ Made Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",2,07:33,9,Boston Celtics,29,6-4,220,6
4,Marcus Smart,Hook Shot,✖ Missed Shot,2PT Field Goal,PHI @ BOS,PHI,BOS,"Tuesday, October 18",2,06:33,6,Boston Celtics,29,6-4,220,6


In [6]:
## Alguns tratamentos que achamos prudentes:
## 1) Substituir "✔ Made Shot" por 1 e "✘ Missed Shot" por 0

fga['MADE'] = fga['MADE'].replace('✔ Made Shot', 1)
fga['MADE'] = fga['MADE'].replace('✖ Missed Shot', 0)


## 2) Substituir "2PT Field Goal" por 2 e "3PT Field Goal" por 3

fga['SHOT TYPE'] = fga['SHOT TYPE'].replace('2PT Field Goal', 2)
fga['SHOT TYPE'] = fga['SHOT TYPE'].replace('3PT Field Goal', 3)


## 3) Criar uma coluna para identificar se o time que arremessou é mandante ou visitante da partida

is_home = []
for i in range(len(fga)):
    if (cod_teams[cod_teams['name'] == fga['TEAM'][i]]["cod"].values[0]) == fga['HTM'][i]:
        is_home.append(1)
    else:
        is_home.append(0) 
fga['IS_HOME'] = is_home


## 4) Transformar a coluna "TIME REMAINING" em segundos.
#     Transformar a coluna "Height" em centímetros assumindo que 1 ft = 30.48 cm:
#     Transformar a coluna "Weight" em quilos assumindo que 1 kg = 2.20 libras

time_remaining = []
heights = []
weights = []
for i in range(len(fga)):
    time = fga['TIME REMAINING'][i].split(':')
    time_remaining.append(int(time[0])*60 + int(time[1]))
    height = float(fga['Height'][i].replace('-', '.')) * 30.48
    heights.append(height)
    weight = float(fga['Weight'][i]) / 2.20
    weights.append(weight)
    

fga['TIME REMAINING'] = time_remaining
fga['Height'] = heights
fga['Weight'] = weights


## 5) Retirar colunas descenessárias para o modelo

fga.drop('BOXSCORE', axis='columns', inplace=True)
fga.drop('VTM', axis='columns', inplace=True)
fga.drop('HTM', axis='columns', inplace=True)
fga.rename(columns={'Game\xa0Date': 'Game Date'}, inplace=True)
fga.drop('Game Date', axis='columns', inplace=True)
fga.head()


Unnamed: 0,PLAYER,PLAY TYPE,MADE,SHOT TYPE,PERIOD,TIME REMAINING,SHOT DISTANCE (FT),TEAM,Age,Height,Weight,DRAFT NUMBER,IS_HOME
0,Marcus Smart,Driving Floating Bank Jump Shot,1,2,1,675,13,Boston Celtics,29,195.072,100.0,6,1
1,Marcus Smart,Jump Shot,0,3,1,402,22,Boston Celtics,29,195.072,100.0,6,1
2,Marcus Smart,Driving Floating Jump Shot,0,2,1,205,6,Boston Celtics,29,195.072,100.0,6,1
3,Marcus Smart,Pullup Jump shot,1,2,2,453,9,Boston Celtics,29,195.072,100.0,6,1
4,Marcus Smart,Hook Shot,0,2,2,393,6,Boston Celtics,29,195.072,100.0,6,1


In [7]:
## Quais são os times que mais arriscam arremessos?

fga.value_counts('TEAM').head(5)

TEAM
Oklahoma City Thunder    7590
Atlanta Hawks            7574
Memphis Grizzlies        7551
Toronto Raptors          7489
Charlotte Hornets        7413
dtype: int64

In [8]:
## E os que mais possuem arremessos convertidos?

fga[fga["MADE"] == 1].value_counts('TEAM').head(5)

TEAM
Atlanta Hawks            3658
Memphis Grizzlies        3585
Denver Nuggets           3574
Sacramento Kings         3573
Golden State Warriors    3538
dtype: int64

In [9]:
## E os que menos possuem arremessos convertidos?

fga[fga["MADE"] == 0].value_counts('TEAM').head(5)

TEAM
Oklahoma City Thunder    4057
Toronto Raptors          4055
Charlotte Hornets        4028
Memphis Grizzlies        3966
Houston Rockets          3958
dtype: int64

In [10]:
## Quais são os times com maior taxa de acerto nos arremessos?

(fga[fga["MADE"] == 1].value_counts('TEAM') / fga.value_counts('TEAM')).sort_values(ascending=False).head(5).map('{:.2%}'.format)

TEAM
Denver Nuggets            50.42%
Sacramento Kings          49.41%
Minnesota Timberwolves    49.04%
Chicago Bulls             49.02%
Cleveland Cavaliers       48.80%
dtype: object

In [11]:
## Quais são os times com menor taxa de acerto nos arremessos?

(fga[fga["MADE"] == 0].value_counts('TEAM') / fga.value_counts('TEAM')).sort_values(ascending=False).head(5).map('{:.2%}'.format)

TEAM
Detroit Pistons      54.57%
Charlotte Hornets    54.34%
Houston Rockets      54.32%
Toronto Raptors      54.15%
Miami Heat           54.01%
dtype: object

In [17]:
print(f"Shape: {fga.shape}")
fga.head()

Shape: (209626, 13)


Unnamed: 0,PLAYER,PLAY TYPE,MADE,SHOT TYPE,PERIOD,TIME REMAINING,SHOT DISTANCE (FT),TEAM,Age,Height,Weight,DRAFT NUMBER,IS_HOME
0,Marcus Smart,Driving Floating Bank Jump Shot,1,2,1,675,13,Boston Celtics,29,195.072,100.0,6,1
1,Marcus Smart,Jump Shot,0,3,1,402,22,Boston Celtics,29,195.072,100.0,6,1
2,Marcus Smart,Driving Floating Jump Shot,0,2,1,205,6,Boston Celtics,29,195.072,100.0,6,1
3,Marcus Smart,Pullup Jump shot,1,2,2,453,9,Boston Celtics,29,195.072,100.0,6,1
4,Marcus Smart,Hook Shot,0,2,2,393,6,Boston Celtics,29,195.072,100.0,6,1


In [None]:
import sys
import pandas as pd
import numpy as np
import sklearn
import matplotlib
import keras

import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import seaborn as sns

from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
from sklearn import model_selection
from keras import regularizers