In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [3]:
data = pd.read_csv("/content/drive/MyDrive/Wise ML/Mall_Customers.csv")
data.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [4]:
def convert(i):
  i = i.lower()
  if i=="male":
    return 0
  elif i=="female":
    return 1
  else:
    return -1

In [5]:
data = pd.read_csv("/content/drive/MyDrive/Wise ML/Mall_Customers.csv",converters={"Genre":convert})

In [6]:
data.shape

(200, 5)

In [7]:
data.head()

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,0,19,15,39
1,2,0,21,15,81
2,3,1,20,16,6
3,4,1,23,16,77
4,5,1,31,17,40


In [8]:
data.columns

Index(['CustomerID', 'Genre', 'Age', 'Annual Income (k$)',
       'Spending Score (1-100)'],
      dtype='object')

In [9]:
labels = data["Spending Score (1-100)"]
features = data[['CustomerID',	'Genre',	'Age',	'Annual Income (k$)']]

**Splitting data into train and test sets:**

In [10]:
 X_train, X_test, y_train, y_test = train_test_split(features,labels, test_size=0.33, random_state=42)

In [11]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((134, 4), (66, 4), (134,), (66,))

**Applying random forest:**

In [12]:
clf = RandomForestClassifier(max_depth=2)

In [13]:
clf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=2, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [14]:
y_pred = clf.predict(X_test)

In [15]:
y_pred

array([55, 73, 14, 75, 43, 43, 42, 75, 43, 42, 42, 75, 75, 55, 43, 75, 46,
       75, 46, 55, 75, 42, 75, 42, 55, 55, 55, 55, 75,  5, 77, 14, 42, 55,
       75, 46, 75, 75, 43, 42, 46, 18, 42, 14, 75, 16, 73, 73, 75, 46, 55,
       75,  6, 75, 43, 46, 43, 46, 75, 42, 75, 42, 73, 14, 73, 43])

In [16]:
data['Spending Score (1-100)'].value_counts()

42    8
55    7
46    6
73    6
35    5
     ..
31    1
82    1
24    1
23    1
99    1
Name: Spending Score (1-100), Length: 84, dtype: int64

In [17]:
from sklearn.metrics import f1_score, classification_report
f1_score(y_test, y_pred,average = 'weighted')

0.016313511050353155

In [18]:
f1_score(y_test, y_pred,average = 'micro')

0.045454545454545456

In [19]:
f1_score(y_test, y_pred,average = 'macro')

0.015819548872180452

In [20]:
f1_score(y_test, y_pred,average = None)

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.4       , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.28571429,
       0.        , 0.10526316, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         1
           5       0.00      0.00      0.00         1
           6       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         2
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         2
          17       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         1
          26       0.00      0.00      0.00         2
          29       0.00      0.00      0.00         2
          34       0.00      0.00      0.00         1
          35       0.00      0.00      0.00         2
          40       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
