 ## Deploy Web API with Flask

### Tutorial
https://minerandodados.com.br/realizando-o-deploy-de-um-modelo-de-machine-learning-em-producao/

### YouTube Tutorial
https://www.youtube.com/watch?v=_dRfScGH7NA&t=704s

In [2]:
import os
import re
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### Carregando a Base de Dados

In [3]:
data = pd.read_csv('loan.csv')

In [4]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [5]:
data.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            614 non-null    object 
 1   Gender             601 non-null    object 
 2   Married            611 non-null    object 
 3   Dependents         599 non-null    object 
 4   Education          614 non-null    object 
 5   Self_Employed      582 non-null    object 
 6   ApplicantIncome    614 non-null    int64  
 7   CoapplicantIncome  614 non-null    float64
 8   LoanAmount         592 non-null    float64
 9   Loan_Amount_Term   600 non-null    float64
 10  Credit_History     564 non-null    float64
 11  Property_Area      614 non-null    object 
 12  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 62.5+ KB


In [7]:
data.Loan_Status.value_counts()

Y    422
N    192
Name: Loan_Status, dtype: int64

In [8]:
data.Married.value_counts()

Yes    398
No     213
Name: Married, dtype: int64

In [9]:
data.Education.value_counts()

Graduate        480
Not Graduate    134
Name: Education, dtype: int64

In [10]:
data2 = data[data.Loan_Status=='Y'].sample(200)

In [12]:
data = data2.append(data[data.Loan_Status=='N'].sample(192))

In [13]:
data.Loan_Status.value_counts()

Y    200
N    192
Name: Loan_Status, dtype: int64

### Checando Missing Values

In [14]:
data2

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
113,LP001392,Female,No,1,Graduate,Yes,7451,0.0,,360.0,1.0,Semiurban,Y
405,LP002305,Female,No,0,Graduate,No,4547,0.0,115.0,360.0,1.0,Semiurban,Y
604,LP002959,Female,Yes,1,Graduate,No,12000,0.0,496.0,360.0,1.0,Semiurban,Y
194,LP001664,Male,No,0,Graduate,No,4191,0.0,120.0,360.0,1.0,Rural,Y
368,LP002190,Male,Yes,1,Graduate,No,6325,0.0,175.0,360.0,1.0,Semiurban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,LP001030,Male,Yes,2,Graduate,No,1299,1086.0,17.0,120.0,1.0,Urban,Y
86,LP001280,Male,Yes,2,Not Graduate,No,3333,2000.0,99.0,360.0,,Semiurban,Y
99,LP001343,Male,Yes,0,Graduate,No,1759,3541.0,131.0,360.0,1.0,Semiurban,Y
253,LP001843,Male,Yes,1,Not Graduate,No,2661,7101.0,279.0,180.0,1.0,Semiurban,Y


In [16]:
data2.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
113,LP001392,Female,No,1,Graduate,Yes,7451,0.0,,360.0,1.0,Semiurban,Y
405,LP002305,Female,No,0,Graduate,No,4547,0.0,115.0,360.0,1.0,Semiurban,Y
604,LP002959,Female,Yes,1,Graduate,No,12000,0.0,496.0,360.0,1.0,Semiurban,Y
194,LP001664,Male,No,0,Graduate,No,4191,0.0,120.0,360.0,1.0,Rural,Y
368,LP002190,Male,Yes,1,Graduate,No,6325,0.0,175.0,360.0,1.0,Semiurban,Y


In [17]:
data2.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,200.0,200.0,197.0,196.0,181.0
mean,5008.605,1462.459,138.827411,341.142857,0.983425
std,3875.995964,1693.066108,63.340597,57.131427,0.128025
min,210.0,0.0,17.0,84.0,0.0
25%,2977.75,0.0,101.0,360.0,1.0
50%,3841.5,1349.0,128.0,360.0,1.0
75%,5789.75,2281.75,160.0,360.0,1.0
max,37719.0,8980.0,496.0,480.0,1.0


In [18]:
data.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,392.0,392.0,378.0,382.0,360.0
mean,5222.877551,1665.894898,144.761905,342.565445,0.763889
std,5514.712151,3300.137251,75.128229,63.249493,0.425282
min,150.0,0.0,9.0,36.0,0.0
25%,2942.5,0.0,100.0,360.0,1.0
50%,3841.5,1239.5,128.0,360.0,1.0
75%,5816.0,2281.75,168.0,360.0,1.0
max,81000.0,41667.0,570.0,480.0,1.0


In [19]:
data.isnull().sum()

Loan_ID               0
Gender                8
Married               3
Dependents           13
Education             0
Self_Employed        22
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           14
Loan_Amount_Term     10
Credit_History       32
Property_Area         0
Loan_Status           0
dtype: int64

In [20]:
data2.isnull().sum()

Loan_ID               0
Gender                3
Married               3
Dependents            7
Education             0
Self_Employed        13
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            3
Loan_Amount_Term      4
Credit_History       19
Property_Area         0
Loan_Status           0
dtype: int64

#### Preenchendo Missing Values
- Dependents: Assumindo o valor majoritário da Coluna
- Self_Employed: Assumindo o valor majoriário da Coluna
- Loan_Amount_Term: Preenchendo com o Valor Médio da Coluna
- Credit_History: Assumindo o Valor Majoritário da Coluna
- Married: Assumindo o Valor Majoritário da Coluna
- Gender: Assumindo o Valor Majoritário da Coluna

In [21]:
data['Gender'] = data['Gender'].fillna('Male')

In [22]:
data['Married'] = data['Married'].fillna('No')

In [23]:
data['Dependents'] = data['Dependents'].fillna('0')

In [24]:
data['Self_Employed'] = data['Self_Employed'].fillna('No')

In [25]:
data['LoanAmount'] = data['LoanAmount'].fillna(data['LoanAmount'].mean())

In [26]:
data['Credit_History'] = data['Credit_History'].fillna(1.0)

In [27]:
data['Loan_Amount_Term'] = data['Loan_Amount_Term'].fillna(data['Loan_Amount_Term'].mean())

In [28]:
data.Credit_History.value_counts()

1.0    307
0.0     85
Name: Credit_History, dtype: int64

### Checando Novament Missing Values

In [29]:
data.isnull().sum()

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

### Transformando Dados Categóricos
- Várias Coluans do DataFrame são categóricos, precisamos transformá-las, são elas:
    - Gender
    - Married
    - Education
    - Self_Employed
    - Property_Area

In [30]:
from sklearn.preprocessing import LabelEncoder

In [None]:
gender_values = {'Female': 0, 'Male': 1}
married_values = {'No': 0, 'Yes': 1}
education_values =
employed_values = 
dependent_values =
loan_values = {}
data.replace({'Gender': gender_values,