In [20]:
# dependencies
import pandas as pd
# dashboard
import plotly.express       as px
from   jupyter_dash         import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from   dash.dependencies    import Input, Output, State

In [2]:
from utils import *

In [3]:
# data
path = 'https://raw.githubusercontent.com/yoselalberto/ia_proyecto_final/main/data/processed/celulares_procesados.csv'

In [4]:
# Load Data
df_inicio = pd.read_csv(path)

In [5]:
# variables globales
# estas columnas serán ignoradas durante el modelado
columnas_ignorar     = {'color', 'pantalla'}
# variable objetivo
columna_objetivo     = 'producto_nombre'
# columnas categoricas
columnas_categoricas = ['marca', 'procesador', 'sistema_operativo', 'tecnologia']
# columnas numericas
columnas_numericas   = ['peso', 'camara_trasera', 'camara_frontal', 'ram', 'memoria', 'precio']
# variables predictoras
columnas_predictoras = columnas_numericas + columnas_categoricas

In [6]:
# elimino columnas, duplicados, y reordeno las columnas
df = df_inicio.drop(columns = columnas_ignorar).drop_duplicates().reset_index(drop = True)[columnas_predictoras + [columna_objetivo]]

In [9]:
# train and test split
predictores = df.drop(columns = columna_objetivo)
objetivo    = df[[columna_objetivo]]

## Ingeniería de variables

Para llenar valores faltantes usaremos el promedio para las variables númericas, y la moda para las variables categoricas; también estandarizamos las variables númericas, restamos la media, y dividimos entre la desviación estandar.  
Para la variable objetivo, simplemente aplicamos el one hot encoding.

In [10]:
# transformo los predictores
predictores_transformed = feature_engineer(predictores, columnas_numericas, columnas_categoricas)
# tambien la variable objetivo
(objetivo_transformed, encoder_objetivo) = ohe_objetivo(objetivo)

## Modelado

Al final implementaremos un RandomForest, de la documentación oficial:

<cite>A random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.</cite>

In [11]:
# creo evaluador para el grid search, utilizaré el micro f1 score
scorer_f1  = make_scorer(f1_score, average = 'micro')
# valores a explorar
# parameters = {'max_depth': [2, 4, 8], 'criterion': ['gini', 'entropy'], 'min_samples_leaf': [1, 2, 4], 'n_estimators': [5, 10, 20]}
parameters = {'max_depth': [2, 4, 8], 'criterion': ['gini', 'entropy'], 'min_samples_leaf': [1, 2, 4]}
# ajuste y evaluacion
# modelo = GridSearchCV(RandomForestClassifier(random_state = 59, oob_score = True, max_features = 6, class_weight = "balanced"), parameters, n_jobs = 6, scoring = scorer_f1, cv = 4)
modelo = GridSearchCV(DecisionTreeClassifier(random_state = 59, max_features = 6, class_weight = "balanced"), parameters, n_jobs = 6, scoring = scorer_f1, cv = 5)
# ajuste modelo
modelo.fit(X = predictores_transformed, y = objetivo_transformed)
# extraigo el mejor
modelo_mejor = modelo.best_estimator_
# ojeada
print(modelo.best_score_, modelo.best_params_) 

0.38014705882352945 {'criterion': 'entropy', 'max_depth': 8, 'min_samples_leaf': 1}


## Interacción con el usuario

El usuario introducirá característias deseadas. 

In [10]:
df_head = df.head(1)

Dash app running on http://127.0.0.1:8050/


In [11]:
def generate_table(dataframe, max_rows = 10):
    #
    df_html = html.Table([
        html.Thead(html.Tr([html.Th(col) for col in dataframe.columns])),
        html.Tbody([html.Tr([html.Td(dataframe.iloc[i][col]) for col in dataframe.columns]) for i in range(min(len(dataframe), max_rows))])
    ])
    #
    return df_html

In [21]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = JupyterDash(__name__, external_stylesheets = external_stylesheets)
#
app.layout = html.Div([
    html.H1(children = 'Recomendación de un telefono inteligente'),
    html.Div(children='''
    A partir de las características elegidas te recomendaremos un telefono:
    '''),
    # first row
    html.Div(children=[
        # first column of first row
        html.Div(children=[
            html.Label('Marca'),
            dcc.Dropdown(id = 'marca', options=[
                {'label': 'Motorola', 'value': 'motorola'},
                {'label': 'Samsung',  'value': 'samsung'},
                {'label': 'Apple',    'value': 'apple'},
                {'label': 'Xiaomi',   'value': 'xiaomi'},
                {'label': 'Huawei',   'value': 'huawei'},
                {'label': 'Nokia',    'value': 'nokia'},
                {'label': 'TCL',      'value': 'tcl'}
            ], value = ''),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw', "width": "7vw"}),
        # second column of first row
        html.Div(children=[
            html.Label('Sistema Operativo'),
            dcc.RadioItems(id = 'sistema_operativo',
                options=[
                    {'label': 'Android', 'value': 'android'},
                    {'label': 'IOS',     'value': 'ios'}
                ], value = ''),
            
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw',
                 "width": "10vw"}),
        # third column of first row
        html.Div(children=[
            html.Label('Red'),
            dcc.RadioItems(id = 'tecnologia', options=[
                {'label': '4G',    'value': '4g'},
                {'label': '4GLte', 'value': '4glte'},
                {'label': '5G',    'value': '5g'}], value = ''
            ),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-right': '3vw', 
                  'margin-top': '3vw', "width": "7vw"}),
        html.Div(children=[
            # fourth column of first row
            html.Label('Procesador'),
            dcc.Dropdown(id = 'procesador', 
                options=[
                    {'label': 'Qualcomm', 'value': 'qualcomm'},
                    {'label': 'Samsung',  'value': 'samsung'},
                    {'label': 'Apple',    'value': 'apple'},
                    {'label': 'Mediatek', 'value': 'mediatek'},
                    {'label': 'ARM',      'value': 'arm'}
                ], value = ''),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-rigth': '3vw', 'margin-top': '3vw', "width": "7vw"}),
    ], className='row'),
    
    #################################################################################################
       # second row
    html.Div(children=[
        # first column of first row
        html.Div(children=[
            html.Label('Precio'),
                dcc.Input(id = "precio_pesos", type = "number", debounce = True, placeholder = '$'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw'}),
        # five
        html.Div(children=[
               html.Label('Ram'),dcc.Input(id = "ram", type = "number", debounce = True, placeholder = 'Gb'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw'}),
        # six
        html.Div(children=[
               html.Label('Memoria'), dcc.Input(id = "memoria", type = "number", debounce = True, placeholder = 'Gb'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw'}),
        # third column of first row
        html.Div(children=[
            html.Label('Camara trasera'), dcc.Input(id = "camara_trasera", type = "number", debounce = True, placeholder = 'Mp'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw','margin-top': '3vw'}),
        # fourth column of first row
        html.Div(children=[
            html.Label('Camara delantera'), dcc.Input(id = "camara_frontal", type = "number", debounce = True, placeholder = 'Mp'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw'}),
        # second column of first row
        html.Div(children=[
            html.Label('Peso'),dcc.Input(id = "peso", type = "number", debounce = True, placeholder = 'gramos'),
        ], style={'display': 'inline-block', 'vertical-align': 'top', 'margin-left': '3vw', 'margin-top': '3vw'}),
    ], className='row'),
    # salida test
    html.Div(id = 'recomendation')
    
])
# interacción
@app.callback(
    Output('recomendation',    'children'),
    Input('precio_pesos',      'value'),
    Input('ram',               'value'),
    Input('memoria',           'value'),
    Input('camara_trasera',    'value'),
    Input('camara_frontal',    'value'),
    Input('peso',              'value'),   
    Input('marca',             'value'),
    Input('procesador',        'value'),
    Input('sistema_operativo', 'value'),
    Input('tecnologia',        'value')
    )
def make_recommendation(precio_pesos, ram, memoria, camara_trasera, camara_frontal, peso,
                        marca, procesador, sistema_operativo, tecnologia):
    # los valores son pocisionales
    # gather inputs in a single dataframe, 
    # fill missing values, complete_df
    # predict, predict_phone
    return 'Output: {}'.format(precio_pesos)
#
app.run_server(mode = 'external')

Dash app running on http://127.0.0.1:8050/


In [31]:
help(JupyterDash.callback)

Help on function callback in module dash.dash:

callback(self, *_args, **_kwargs)
    Normally used as a decorator, `@app.callback` provides a server-side
    callback relating the values of one or more `Output` items to one or
    more `Input` items which will trigger the callback when they change,
    and optionally `State` items which provide additional information but
    do not trigger the callback directly.
    
    The last, optional argument `prevent_initial_call` causes the callback
    not to fire when its outputs are first added to the page. Defaults to
    `False` unless `prevent_initial_callbacks=True` at the app level.



In [35]:
df.head(1)

Unnamed: 0,peso,camara_trasera,camara_frontal,ram,memoria,precio,marca,procesador,sistema_operativo,tecnologia,producto_nombre
0,0.282,12,10,12,256,46799,samsung,qualcomm,android,5g,galaxy z fold2
