In [1]:
# Import the required modules
import pandas as pd
from pathlib import Path
import hvplot.pandas

## Load the Credit Card Data into a Pandas DataFrame

In [2]:
# Read in the CSV file as a Pandas Dataframe
ccinfo_df = pd.read_csv(
    Path("../Resources/cc_info_default.csv")
)

In [3]:
# Review the DataFrame
ccinfo_df.head()


Unnamed: 0,limit_bal,education,marriage,age,bill_amt,pay_amt,default
0,20000,secondary,yes,24,7704,689,1
1,120000,secondary,no,26,17077,5000,1
2,90000,secondary,no,34,101653,11018,0
3,50000,secondary,yes,37,231334,8388,0
4,50000,secondary,yes,57,109339,59049,0


In [4]:
# Review the info
ccinfo_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4999 entries, 0 to 4998
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   limit_bal  4999 non-null   int64 
 1   education  4999 non-null   object
 2   marriage   4999 non-null   object
 3   age        4999 non-null   int64 
 4   bill_amt   4999 non-null   int64 
 5   pay_amt    4999 non-null   int64 
 6   default    4999 non-null   int64 
dtypes: int64(5), object(2)
memory usage: 273.5+ KB


## Transform "education" column with get_dummies

In [5]:
# Verify the categories of the "education" column
ccinfo_df["education"].unique()




array(['secondary', 'primary', 'post-grad', 'other'], dtype=object)

In [7]:
# Transform the education column using get_dummies
card_dummies = pd.get_dummies(ccinfo_df, columns=["education"])

# Display the transformed data
card_dummies.head()


Unnamed: 0,limit_bal,marriage,age,bill_amt,pay_amt,default,education_other,education_post-grad,education_primary,education_secondary
0,20000,yes,24,7704,689,1,False,False,False,True
1,120000,no,26,17077,5000,1,False,False,False,True
2,90000,no,34,101653,11018,0,False,False,False,True
3,50000,yes,37,231334,8388,0,False,False,False,True
4,50000,yes,57,109339,59049,0,False,False,False,True


In [8]:
# Concatenate the df_shopping_transformed and the card_dummies DataFrames
ccinfo_df = pd.concat([ccinfo_df, card_dummies], axis="columns")

# Drop the original education column
ccinfo_df.drop(columns=["education"], inplace=True)

# Display the DataFrame
ccinfo_df.head()


Unnamed: 0,limit_bal,marriage,age,bill_amt,pay_amt,default,limit_bal.1,marriage.1,age.1,bill_amt.1,pay_amt.1,default.1,education_other,education_post-grad,education_primary,education_secondary
0,20000,yes,24,7704,689,1,20000,yes,24,7704,689,1,False,False,False,True
1,120000,no,26,17077,5000,1,120000,no,26,17077,5000,1,False,False,False,True
2,90000,no,34,101653,11018,0,90000,no,34,101653,11018,0,False,False,False,True
3,50000,yes,37,231334,8388,0,50000,yes,37,231334,8388,0,False,False,False,True
4,50000,yes,57,109339,59049,0,50000,yes,57,109339,59049,0,False,False,False,True


## Transform "marriage" column with encoding function

In [11]:
# Encoding the marriage column using a custom function
def encode_marriage(marriage):
    if marriage == "yes":
        return 1
    else 0
     

# Call the encode_marriage function on the marriage column
ccinfo_df["marriage"] = ccinfo_df["marriage"].apply(encode_marriage)


# Review the DataFrame 
ccinfo_df.head()



SyntaxError: expected ':' (270752167.py, line 5)

## Apply the Standard Scaler to "limit_bal", "bill_amt", "pay_amt"

In [9]:
# Import the module
module =


In [10]:
# Scaling the numeric columns


# Review the scaled data



array([[-1.1173411 , -0.66070266, -0.5427793 ],
       [-0.3499424 , -0.63637003, -0.46399421],
       [-0.58016201, -0.41680786, -0.35401308],
       ...,
       [ 0.26397655,  1.1152494 , -0.16349243],
       [ 1.10811512,  3.33813208,  0.76045505],
       [-0.04298292, -0.66917611, -0.4872953 ]])

In [11]:
# Create a DataFrame of the scaled data


# Replace the original data with the columns of information from the scaled Data


# Review the DataFrame



Unnamed: 0,limit_bal,marriage,age,bill_amt,pay_amt,default,other,post-grad,primary,secondary
0,-1.117341,1,24,-0.660703,-0.542779,1,0,0,0,1
1,-0.349942,0,26,-0.63637,-0.463994,1,0,0,0,1
2,-0.580162,0,34,-0.416808,-0.354013,0,0,0,0,1
3,-0.887121,1,37,-0.080152,-0.402077,0,0,0,0,1
4,-0.887121,1,57,-0.396855,0.523771,0,0,0,0,1


## Elbow Method to find k

In [12]:
# Import the KMeans module from SKLearn
from sklearn.cluster import KMeans

In [13]:
# Create a a list to store inertia values and the values of k



In [14]:
# Create a for-loop where each value of k is evaluated using the K-means algorithm
# Fit the model using the service_ratings DataFrame
# Append the value of the computed inertia from the `inertia_` attribute of the KMeans model instance


    

In [15]:
# Define a DataFrame to hold the values for k and the corresponding inertia


# Review the DataFrame



Unnamed: 0,k,inertia
0,1,449413.376075
1,2,152036.470987
2,3,83362.744848
3,4,58548.383261
4,5,45451.282971


In [16]:
# Plot the DataFrame



## Kmeans algo to cluster data

In [17]:
# Define the model with 3 clusters


# Fit the model


# Make predictions


# Create a copy of the preprocessed data


# Add a class column with the labels


In [18]:
# Plot the clusters
