# Primer modelo automático usando TPOT

## Importamos las librerías

In [None]:
import numpy as np
import pandas as pd
from tpot import TPOTClassifier

from sklearn.model_selection import train_test_split

## Importamos los datos con pandas

In [None]:
# Cargando los datos
datos_titanic = pd.read_csv('./titanic_train.csv')
entrenamiento, pruebas = train_test_split(datos_titanic,test_size=0.3)


In [None]:
entrenamiento.describe()

In [None]:
entrenamiento.head()

## Hacemos una "limpieza" de nuestro datos antes de hacer el modelo

In [None]:
combine = [entrenamiento, pruebas]

In [None]:
# Convert string values 'male' and 'female' to int values
sex_mapping = {'male': 0, 'female': 1}
entrenamiento['Sex'] = entrenamiento['Sex'].map(sex_mapping)
pruebas['Sex'] = pruebas['Sex'].map(sex_mapping)

In [None]:
calculo_edades = np.zeros((2,3))

In [None]:
for dataset in combine:
    for sex in range(0, 2):
        for pclass in range(0, 3):
            guess_df = dataset[(dataset['Sex'] == sex) & (dataset['Pclass'] == pclass+1)]['Age'].dropna()
            age_guess = guess_df.median()
            calculo_edades[sex, pclass] = int(age_guess/0.5 + 0.5) * 0.5
    
    for sex in range(0, 2):
        for pclass in range(0, 3):
            dataset.loc[(dataset.Age.isnull()) & (dataset.Sex == sex) &(dataset.Pclass == pclass+1),'Age'] = calculo_edades[sex, pclass]

In [None]:
entrenamiento = entrenamiento.drop(['Ticket', 'Cabin', 'Name', 'PassengerId', 'SibSp', 'Parch', 'Embarked'], axis=1)
pruebas = pruebas.drop(['Ticket', 'Cabin', 'Name', 'SibSp', 'Parch', 'Embarked'], axis=1)

X_train = entrenamiento.drop('Survived', axis=1)
Y_train = entrenamiento['Survived']
X_test  = pruebas.drop(["PassengerId","Survived"], axis=1)

## Creamos y entrenamos nuestro modelo

In [None]:
tpot = TPOTClassifier(verbosity=2, max_time_mins=2)

In [None]:
tpot.fit(X_train, Y_train)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.decomposition import PCA

In [None]:
nuevo_modelo = GradientBoostingClassifier(PCA(PCA(iterated_power=10, svd_solver='randomized'), iterated_power=6, svd_solver='randomized'), learning_rate=0.1, max_depth=5, max_features=0.8500000000000001, min_samples_leaf=16, min_samples_split=9, n_estimators=100, subsample=0.5)

In [None]:
nuevo_modelo.fit(X_train, Y_train)

## Predecimos con nuestro árbol y la tasa de exactitud

In [None]:
Y_pred = nuevo_modelo.predict(X_test)

In [None]:
Y_pred

In [None]:
decision_tree.score(X_train, Y_train)

In [None]:
tpot.export("algo.py")