In [None]:
# Install Vectice
%pip install -q vectice -U

### Instructions

Paste your API token below and execute the block. (your token can be generated [here](https://app.vectice.com/account/api-keys) )   

How to capture a model in Vectice for documentating

In [None]:
# Import vectice package
import vectice

# Connect using your token API - Your token can be found here: https://app.vectice.com/account/api-keys
conn = vectice.connect(
    api_token='YOUR API TOKEN', 
    host='https://app.vectice.com',
    workspace='Samples'
)
# Alternate methods of connecting
# project = vc.connect(config='~/.config/vectice-config.json')
# provided the json file contains the "WORKSPACE" and "PROJECT" entries,
# OR
# project = vectice.connect(config="<API_key_config_name>.json", workspace="ws_name", project="project_name")
# both will return a project refrerence

# Open the project
project = conn.project("How To: Reporting your Milestones")

### Generate a Random Forest model

In [None]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#Load dataset
iris = datasets.load_iris()

# Transform data
data= pd.DataFrame({
    'sepal length':iris.data[:,0],
    'sepal width':iris.data[:,1],
    'petal length':iris.data[:,2],
    'petal width':iris.data[:,3],
    'species':iris.target
})

# Separate features/labels
X=data[['sepal length', 'sepal width', 'petal length', 'petal width']]
y=data['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=44)

# Create a Gaussian Classifier
clf = RandomForestClassifier(n_estimators=100)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
m = {'Accuracy': metrics.accuracy_score(y_test, y_pred)}

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                               display_labels=clf.classes_)
disp.plot()
plt.tight_layout()
plt.savefig('Confusion_Matrix.png')
plt.show()

# Creating a bar plot
feature_imp = pd.Series(clf.feature_importances_,index=iris.feature_names).sort_values(ascending=False)
sns.barplot(x=feature_imp, y=feature_imp.index)

# Add labels to your graph
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
plt.tight_layout()
plt.savefig('Feature_Importance.png')
plt.show()


### Document the base model in Vectice

In [None]:
# Document the model in Vectice
step = project.phase("Logging a model").iteration().step("Assess model")
step.iteration.model = vectice.Model(name="Iris Classification", library="scikit-learn", technique="RandomForestClassifier", metrics=m)
step.close(message="Base model attached.") 

### Model Tuning

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

param_grid = {
    'n_estimators': [25, 50, 100, 150],
    'max_features': ['sqrt', 'log2', None],
    'max_depth': [3, 6, 9],
    'max_leaf_nodes': [3, 6, 9],
}

random_search = RandomizedSearchCV(RandomForestClassifier(),
                                   param_grid)
random_search.fit(X_train, y_train)
print(random_search.best_estimator_)
clf = RandomForestClassifier(max_depth=random_search.best_estimator_.max_depth,
                            max_features=random_search.best_estimator_.max_features,
                            max_leaf_nodes=random_search.best_estimator_.max_leaf_nodes,
                            n_estimators=random_search.best_estimator_.n_estimators)

clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
m = {'Accuracy': metrics.accuracy_score(y_test, y_pred)}

# Advanced Logging - Capture metrics, properties, model, and atachments
step = project.phase("Advanced Logging").iteration().step("Assess model")
step.iteration.model = vectice.Model(
                                    name="Iris Classification", 
                                    library="scikit-learn", 
                                    technique="RandomForestClassifier",
                                    attachments=['Confusion_Matrix.png','Feature_Importance.png'],
                                    properties = clf.get_params(),
                                    predictor = clf,
                                    metrics = m)
step.close(message="Model has been documented with advanced logging.") 