# Player Teams Dataset Statistics

## Section: Import Data
This section focuses on import libraries and players_teams dataset.
The process involves:
- import pandas and matplotlib.pyplot libraries
- import players_teams dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

data = pd.read_csv('basketballPlayoffs/players_teams.csv', delimiter=",")

## Section: Explore Dataset
This section focuses on explore the dataset.
The process involves:
- Find null values
- Calcule statistical data like percentile, mean and std of the numerical values 

In [None]:
## finding null values
data.isnull().sum()

In [None]:
# summary statistics
data.describe()

## Section: Statistics
This section focuses on extraction of important statistics to analyse the dataset.
The process involves:
- Make a graphic of the teams with most points
- Make a graphic of the teams with most post season games played
- Make a graphic of the teams with most rebounds

### Graphic Teams with the Most Points

In [None]:
team_points = data.groupby('tmID')['points'].sum()
team_points = team_points.sort_values(ascending=False)

plt.figure(figsize=(12,6))
team_points.plot(kind='bar')
plt.xlabel('Team')
plt.ylabel('Total Points')
plt.title('Teams with the most points')
plt.xticks(rotation=0)
plt.show()


### Graphic Teams with the Most Post Games Played

In [None]:
team_postGP = data.groupby('tmID')['PostGP'].sum()
team_postGP = team_postGP.sort_values(ascending=False)

plt.figure(figsize=(12,6))
team_postGP.plot(kind='bar')
plt.xlabel('Team')
plt.ylabel('Total Post Games Played')
plt.title('Teams with the most post games played')
plt.xticks(rotation=0)
plt.show()

### Graphic Teams with the Most Rebounds

In [None]:
team_rebounds = data.groupby('tmID')['rebounds'].sum()
team_rebounds = team_rebounds.sort_values(ascending=False)

plt.figure(figsize=(12,6))
team_rebounds.plot(kind='bar')
plt.xlabel('Team')
plt.ylabel('Total Rebounds')
plt.title('Teams with the most rebounds')
plt.xticks(rotation=0)
plt.show()

## Section: Correlation Analysis
This section focuses on analyse correlation between data on the dataset.
The process involves:
- Group stats by team
- Make a correlation matrix

In [None]:
grouped_data = data.groupby('tmID')
grouped_data.head()
team_stats = grouped_data.agg({
    'GP': 'sum',
    'GS': 'sum',
    'minutes': 'sum',
    'points': 'sum',
    'oRebounds': 'sum',
    'dRebounds': 'sum',
    'rebounds': 'sum',
    'assists': 'sum',
    'steals': 'sum',
    'blocks': 'sum',
    'turnovers': 'sum',
    'PF': 'sum',
    'fgAttempted': 'sum',
    'fgMade': 'sum',
    'ftAttempted': 'sum',
    'ftMade': 'sum',
    'threeAttempted': 'sum',
    'threeMade': 'sum',
    'dq': 'sum',
    'PostGP': 'sum'
})

team_stats

In [None]:
# Calculate the correlation between points, rebounds, and another statistic
correlation = team_stats[["GP","GS","minutes","points","oRebounds","dRebounds","rebounds","assists","steals","blocks","turnovers","PF","fgAttempted","fgMade","ftAttempted","ftMade","threeAttempted","threeMade","dq","PostGP"]].corr()

print(correlation)

# Create a heatmap
plt.figure(figsize=(12, 10))  # Adjust the figure size as needed
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)

# Customize heatmap appearance (optional)
plt.title('Correlation Heatmap')
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.show()