# Shapash: Python machine learning interpretable

- Criar um ambiente com python 3.10 <br>
conda create -n py310 python==3.10 <br>

- Ativar o ambiente com python 3.10 <br>
conda activate py310 <br>


- No Terminal de Comando: <br>
conda activate py310 <br>
pip install -q shapash        <br>
pip install -q shapash[report]  <br>

- instalar a IDE Jupyter notebook <br>
pip install -q jupyter <br>
python --version <br>
jupyter notebook

In [None]:
!python --version

In [None]:
%%time

!pip install -q shapash

# Carregamento dos dados

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
%%time
!pip install -q XGBoost

In [None]:
!pip install -q lightgbm

In [None]:
# brew install libomp

In [None]:
from lightgbm import LGBMRegressor

In [None]:
from category_encoders import OrdinalEncoder
import numpy as np 
import pandas as pd 
import xgboost as xg 
from category_encoders import OrdinalEncoder
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error as MSE 
from sklearn.ensemble import ExtraTreesRegressor

# Building Supervized Model

In [None]:
from shapash.data.data_loader import data_loading
house_df, house_dict = data_loading('house_prices')

In [None]:
y_df=house_df['SalePrice'].to_frame()
X_df=house_df[house_df.columns.difference(['SalePrice'])]

In [None]:
house_df.shape

In [None]:
house_df.head()

# Encoding Categorical Features

In [None]:
from category_encoders import OrdinalEncoder

categorical_features = [col for col in X_df.columns if X_df[col].dtype == 'object']

encoder = OrdinalEncoder(
    cols=categorical_features,
    handle_unknown='ignore',
    return_df=True).fit(X_df)

X_df=encoder.transform(X_df)

# Train / Test Split

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X_df, y_df, train_size=0.75, random_state=1)

# Model Fitting

In [None]:
regressor = xg.XGBRegressor(objective ='reg:linear', 
                  n_estimators = 10, seed = 123) 

# Understanding my model with shapash

## Declare and Compile SmartExplainer

In [None]:
from shapash import SmartExplainer

In [None]:
%%time
xpl = SmartExplainer(
    model=regressor,
    preprocessing=encoder,   # Optional: compile step can use inverse_transform method
    features_dict=house_dict # optional parameter, specifies label for features name 
)

In [None]:
xpl.compile(x=Xtest,
            y_target=ytest # Optional: allows to display True Values vs Predicted Values
           )

In [None]:
app = xpl.run_app(title_story='House Prices', port=8020)

In [None]:
app.kill()

In [None]:
import sklearn
from sklearn import linear_model

In [None]:
%%time

regressor = linear_model
from shapash import SmartExplainer
xpl = SmartExplainer(
  model=regressor,
  features_dict=house_dict,  # Optional parameter
  preprocessing=encoder, # Optional: compile step can use inverse_transform method
  postprocessing=postprocess, # Optional: see tutorial postprocessing  
)

In [None]:
%%time

xpl.compile(
    x=Xtest,    
    y_pred=y_pred, # Optional: for your own prediction (by default: model.predict)
    y_target=yTest, # Optional: allows to display True Values vs Predicted Values
    additional_data=X_additional, # Optional: additional dataset of features for Webapp
    additional_features_dict=features_dict_additional, # Optional: dict additional data    
)

In [None]:
%%time

app = xpl.run_app()

In [None]:
%%time

xpl.generate_report(
    output_file='path/to/output/report.html',
    project_info_file='path/to/project_info.yml',
    x_train=Xtrain,
    y_train=ytrain,
    y_test=ytest,
    title_story="House prices report",
    title_description="""This document is a data science report of the kaggle house prices tutorial project.
        It was generated using the Shapash library.""",
    metrics=[{'name': 'MSE', 'path': 'sklearn.metrics.mean_squared_error'}]
)