**Insights**:

* El dataset se compone de 10 columnas de las cuales 9 corresponden a variables independientes, que usaremos para predecir el target.

* Classification es la variable a predecir. Todas las variables son numéricas, ya sea enteras o reales y no tiene valores nulos.

In [1]:
#Carga de las librerías
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Leer el dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00451/dataR2.csv'
data= pd.read_csv(url,sep=',')
data.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1


In [5]:
y.value_counts()

Classification
2    64
1    52
Name: count, dtype: int64

In [4]:
#Separamos en X e y
X = data.drop(["Classification"], axis=1)
y = data["Classification"]

In [6]:
#Separamos en train y test!
(X_train, X_test,y_train, y_test) = train_test_split(X,y,stratify=y,test_size=0.30,random_state=42)

In [7]:
y_test.value_counts()

Classification
2    19
1    16
Name: count, dtype: int64

# **DecisionTree**

In [None]:
#Creamos un arbol de decisión sencillo y lo fiteamos
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train, y_train)

In [None]:
y_test_pred = tree.predict(X_test) #Prediccion en Test

A lo largo de este notebook, se solicita calcular las métricas requeridas como así también su correspondiente interpretación:

In [None]:
#Calcular la métrica Accuracy.
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_test_pred)

0.7714285714285715

In [None]:
###Calcular la métrica precisión
from sklearn.metrics import precision_score
precision_score(y_test, y_test_pred)

0.7222222222222222

In [None]:
#Calcular la métrica Recall
from sklearn.metrics import recall_score
recall_score(y_test, y_test_pred)

0.8125

In [None]:
#Calcular la métrica F1 score
from sklearn.metrics import f1_score
f1_score(y_test, y_test_pred)

0.7647058823529411

In [None]:
#Calcular la Matriz de Confusión
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)

array([[13,  3],
       [ 5, 14]])

In [None]:
y_test.value_counts()

Classification
2    19
1    16
Name: count, dtype: int64

In [None]:
from sklearn.metrics import classification_report
reporte=classification_report(y_test,y_test_pred)
print(reporte)

              precision    recall  f1-score   support

           1       0.72      0.81      0.76        16
           2       0.82      0.74      0.78        19

    accuracy                           0.77        35
   macro avg       0.77      0.77      0.77        35
weighted avg       0.78      0.77      0.77        35



# **Random Forest**

In [None]:
# Leer el dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00451/dataR2.csv'
data= pd.read_csv(url,sep=',')
data.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1


In [None]:
#Separamos en X e y
X = data.drop(["Classification"], axis=1)
y = data["Classification"]

In [None]:
#Separamos en train y test!
(X_train, X_test,y_train, y_test) = train_test_split(X,y,stratify=y,test_size=0.30,random_state=42)

In [None]:
#Creamos un random forest!
model = RandomForestClassifier(random_state=42, n_estimators=100,
                               class_weight="balanced", max_features="log2")
model.fit(X_train, y_train)

In [None]:
#Prediccion en Test
y_test_pred = model.predict(X_test)

In [None]:
###Calcular la métrica Accuracy.
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_test_pred)

0.7714285714285715

In [None]:
###Calcular la métrica Precision
from sklearn.metrics import precision_score
precision_score(y_test, y_test_pred)

0.7

In [None]:
###Calcular la métrica Recall
from sklearn.metrics import recall_score
recall_score(y_test, y_test_pred)

0.875

In [None]:
##Calcular la métrica F1 score
from sklearn.metrics import f1_score
f1_score(y_test, y_test_pred)

0.7777777777777777

In [None]:
#Calcular la Matriz de Confusión
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)

array([[14,  2],
       [ 6, 13]])

In [None]:
from sklearn.metrics import classification_report
reporte=classification_report(y_test,y_test_pred)
print(reporte)

              precision    recall  f1-score   support

           1       0.72      0.81      0.76        16
           2       0.82      0.74      0.78        19

    accuracy                           0.77        35
   macro avg       0.77      0.77      0.77        35
weighted avg       0.78      0.77      0.77        35

