In [9]:
import pandas as pd
from sqlalchemy import create_engine

# Extract CSV files into DataFrames

In [10]:
# Load baseball csv into a dataframe
baseball_file = "Resources/mlbsalaries.csv"
baseball_df = pd.read_csv(baseball_file)
baseball_df.head()

Unnamed: 0,salary,name,total_value,pos,years,avg_annual,team
0,"$ 3,800,000",Darryl Strawberry,"$ 3,800,000",OF,1 (1991),"$ 3,800,000",LAD
1,"$ 3,750,000",Kevin Mitchell,"$ 3,750,000",OF,1 (1991),"$ 3,750,000",SF
2,"$ 3,750,000",Will Clark,"$ 3,750,000",1B,1 (1991),"$ 3,750,000",SF
3,"$ 3,625,000",Mark Davis,"$ 3,625,000",P,1 (1991),"$ 3,625,000",KC
4,"$ 3,600,000",Eric Davis,"$ 3,600,000",OF,1 (1991),"$ 3,600,000",CIN


In [11]:
# Load basketball csv into a dataframe
basketball_file = "Resources/nbasalaries.csv"
basketball_df = pd.read_csv(basketball_file)
basketball_df.head()

Unnamed: 0,RK,NAME,TEAM,SALARY,Team Rank
0,1,LeBron James,CLE,30963450.0,1
1,2,Al Horford,BOS,26540100.0,1
2,3,Kevin Durant,GSW,26540100.0,1
3,4,James Harden,HOU,26540100.0,1
4,5,Mike Conley,MEM,26540100.0,1


In [12]:
# Load football csv into a dataframe
football_file = "Resources/nflsalaries.csv"
football_df = pd.read_csv(football_file)
football_df.head()

Unnamed: 0,name,pos,cap_hit,cap_percent,season,team
0,Russell Wilson,QB,31000000,15.53,2020,seattle-seahawks
1,Aaron Rodgers,QB,29672000,15.06,2019,green-bay-packers
2,Kirk Cousins,QB,29000000,15.12,2019,minnesota-vikings
3,Jared Goff,QB,28842682,14.15,2020,los-angeles-rams
4,Khalil Mack,OLB,26600000,12.52,2020,chicago-bears


# Transform Baseball DataFrame

In [13]:
# Create a filtered dataframe from specific columns

baseball_cols = ["name", "salary", "years", "pos", "team"]
baseball_transformed = baseball_df[baseball_cols].copy()

# Rename columns
baseball_transformed = baseball_transformed.rename(columns = {"salary": "Salary",
                                                              "name": "Name",
                                                              "pos": "Position",
                                                              "years": "Year", 
                                                              "team": "Team"})
# Clean the data by setting the index
baseball_transformed.set_index("Name", inplace=True)

# Display the first five rows of data to verify column names and index
baseball_transformed.head()


# count = baseball_transformed["pos"].value_counts()
# count

Unnamed: 0_level_0,Salary,Year,Position,Team
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Darryl Strawberry,"$ 3,800,000",1 (1991),OF,LAD
Kevin Mitchell,"$ 3,750,000",1 (1991),OF,SF
Will Clark,"$ 3,750,000",1 (1991),1B,SF
Mark Davis,"$ 3,625,000",1 (1991),P,KC
Eric Davis,"$ 3,600,000",1 (1991),OF,CIN


# Transform Basketball DataFrame

In [19]:
# Create a filtered dataframe from specific columns

basketball_cols = ["NAME", "SALARY         ", "TEAM"]
basketball_transformed = basketball_df[basketball_cols].copy()

# Rename columns
basketball_transformed = basketball_transformed.rename(columns = {"NAME": "Name",
                                                                  "SALARY         ": "Salary",
                                                                  "TEAM": "Team"})

# Clean the data by setting the index
basketball_transformed.set_index("Name", inplace=True)

# Format salary column to include dollar sign, commas, and two decimal places
basketball_transformed["Salary"] = basketball_transformed["Salary"].apply(lambda x: f"$ {x:,.0f}")

# Display the first five rows of data to verify column names and index
basketball_transformed.head()

Unnamed: 0_level_0,Salary,Team
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
LeBron James,"$ 30,963,450",CLE
Al Horford,"$ 26,540,100",BOS
Kevin Durant,"$ 26,540,100",GSW
James Harden,"$ 26,540,100",HOU
Mike Conley,"$ 26,540,100",MEM


# Transform Football DataFrame

In [20]:
# Create a filtered dataframe from specific columns

football_cols = ["name", "cap_hit", "season", "pos", "team"]
football_transformed = football_df[football_cols].copy()

# Rename columns
football_transformed = football_transformed.rename(columns = {"name": "Name",
                                                              "cap_hit": "Salary",
                                                              "season": "Year",
                                                              "pos": "Position",
                                                              "team": "Team"})

# Clean the data by setting the index
football_transformed.set_index("Name", inplace=True)

# Format salary column to include dollar sign, commas, and two decimal places
football_transformed["Salary"] = football_transformed["Salary"].apply(lambda x: f"$ {x:,.0f}")

# Display the first five rows of data to verify column names and index
football_transformed.head()


Unnamed: 0_level_0,Salary,Year,Position,Team
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Russell Wilson,"$ 31,000,000",2020,QB,seattle-seahawks
Aaron Rodgers,"$ 29,672,000",2019,QB,green-bay-packers
Kirk Cousins,"$ 29,000,000",2019,QB,minnesota-vikings
Jared Goff,"$ 28,842,682",2020,QB,los-angeles-rams
Khalil Mack,"$ 26,600,000",2020,OLB,chicago-bears
