In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

In [3]:
loan = pd.read_csv('data.csv')

In [4]:
loan.head()

Unnamed: 0,Id,Income,Age,Experience,Married/Single,House_Ownership,Car_Ownership,Profession,CITY,STATE,CURRENT_JOB_YRS,CURRENT_HOUSE_YRS,Risk_Flag
0,1,1303834,23,3,single,rented,no,Mechanical_engineer,Rewa,Madhya_Pradesh,3,13,0
1,2,7574516,40,10,single,rented,no,Software_Developer,Parbhani,Maharashtra,9,13,0
2,3,3991815,66,4,married,rented,no,Technical_writer,Alappuzha,Kerala,4,10,0
3,4,6256451,41,2,single,rented,yes,Software_Developer,Bhubaneswar,Odisha,2,12,1
4,5,5768871,47,11,single,rented,no,Civil_servant,Tiruchirappalli[10],Tamil_Nadu,3,14,1


In [5]:
X = loan.drop(columns=['Id','Profession','CITY','STATE','Risk_Flag'], axis=1)
Y = loan['Risk_Flag']

In [6]:
X['Married/Single'] = X['Married/Single'].replace({'single': 0, 'married': 1})
X['House_Ownership'] = X['House_Ownership'].apply(lambda x: 0 if x == "rented" else 1)
X['Car_Ownership'] = X['Car_Ownership'].replace({'no': 0, 'yes': 1})

In [7]:
X.head()

Unnamed: 0,Income,Age,Experience,Married/Single,House_Ownership,Car_Ownership,CURRENT_JOB_YRS,CURRENT_HOUSE_YRS
0,1303834,23,3,0,0,0,3,13
1,7574516,40,10,0,0,0,9,13
2,3991815,66,4,1,0,0,4,10
3,6256451,41,2,0,0,1,2,12
4,5768871,47,11,0,0,0,3,14


In [8]:
scaler = StandardScaler()
scaler.fit(X)

StandardScaler()

In [9]:
standarized_data = scaler.transform(X)

In [10]:
print(standarized_data)

[[-1.28314486e+00 -1.57960351e+00 -1.18023232e+00 ... -6.57128741e-01
  -9.14130833e-01  7.16355856e-01]
 [ 8.95457244e-01 -5.83343640e-01 -1.40667075e-02 ... -6.57128741e-01
   7.31036347e-01  7.16355856e-01]
 [-3.49268601e-01  9.40347921e-01 -1.01363724e+00 ... -6.57128741e-01
  -6.39936303e-01 -1.42798056e+00]
 ...
 [-1.64912549e-01 -2.31722510e-01 -5.13851972e-01 ... -6.57128741e-01
   1.82647287e-01  1.57705165e-03]
 [ 5.24618196e-01 -2.90326032e-01 -1.68001759e+00 ... -6.57128741e-01
  -1.73671442e+00 -1.42798056e+00]
 [ 1.41510816e+00  1.17476201e+00  1.15209891e+00 ... -6.57128741e-01
   1.82647287e-01 -7.13201753e-01]]


In [11]:
X = standarized_data
Y = loan['Risk_Flag']

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [13]:
print(X.shape, X_train.shape, X_test.shape)

(252000, 8) (201600, 8) (50400, 8)


In [14]:
classifier = svm.SVC(kernel='linear')

In [15]:
classifier.fit(X_train, Y_train)

SVC(kernel='linear')

In [16]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print("Akurasi data training adalah = ", training_data_accuracy)

Akurasi data training adalah =  0.876999007936508


In [17]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Akurasi data testing adalah = ', test_data_accuracy)

Akurasi data testing adalah =  0.8770039682539682


In [18]:
input_data = (1303834,23,3,0,0,0,3,13)
input_data_as_numpy_array = np.array(input_data)
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshape)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if prediction[0] == 0:
    print("Loan is not approved")
else:
    print("Loan is approved")

[[-1.28314486 -1.57960351 -1.18023232 -0.33720003 -0.29442259 -0.65712874
  -0.91413083  0.71635586]]
[0]
Loan is not approved




In [19]:
import pickle

In [20]:
filename = 'loan.sav'
pickle.dump(classifier, open(filename, 'wb'))