In [1]:
import pandas as pd

In [2]:
file_path = 'resources/shopping_data.csv'
shopping_df = pd.read_csv(file_path)
shopping_df.head()

Unnamed: 0,CustomerID,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,Yes,19.0,15000,39.0
1,2,Yes,21.0,15000,81.0
2,3,No,20.0,16000,6.0
3,4,No,23.0,16000,77.0
4,5,No,31.0,17000,40.0


In [5]:
shopping_df.dtypes

CustomerID                  int64
Card Member                object
Age                       float64
Annual Income               int64
Spending Score (1-100)    float64
dtype: object

In [7]:
# Check for null values
for col in shopping_df.columns:
    print(f"Column {col} has {shopping_df[col].isnull().sum()} null values")

Column CustomerID has 0 null values
Column Card Member has 2 null values
Column Age has 2 null values
Column Annual Income has 0 null values
Column Spending Score (1-100) has 1 null values


In [9]:
# Drop null values
shopping_df = shopping_df.dropna()
for col in shopping_df.columns:
    print(f"Column {col} has {shopping_df[col].isnull().sum()} null values")

Column CustomerID has 0 null values
Column Card Member has 0 null values
Column Age has 0 null values
Column Annual Income has 0 null values
Column Spending Score (1-100) has 0 null values


In [10]:
shopping_df = shopping_df.drop(['CustomerID'],axis=1)
shopping_df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,Yes,19.0,15000,39.0
1,Yes,21.0,15000,81.0
2,No,20.0,16000,6.0
3,No,23.0,16000,77.0
4,No,31.0,17000,40.0


In [11]:
# Changing data into a type that can be processed

def change_to_num(val):
    if val == 'Yes':
        return 1
    else:
        return 0

shopping_df['Card Member'] = shopping_df['Card Member'].apply(change_to_num)
shopping_df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15000,39.0
1,1,21.0,15000,81.0
2,0,20.0,16000,6.0
3,0,23.0,16000,77.0
4,0,31.0,17000,40.0


In [12]:
shopping_df['Annual Income'] = shopping_df['Annual Income']/1000
shopping_df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0


In [15]:
shopping_df.rename(columns={"Card Member":"card_member","Age":"age","Annual Income":"annual_income","Spending Score (1-100)":"spending_score"},inplace=True)
shopping_df.head()

Unnamed: 0,card_member,age,annual_income,spending_score
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0


In [16]:
# Save new table as a csv file
file_path = "resources/new_shopping_data.csv"
shopping_df.to_csv(file_path,index=False)