In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from joblib import load, dump
from sklearn.datasets import load_wine
import plotly.express as px

In [8]:
wine = load_wine(as_frame=True)
X,y = wine.data, wine.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
print(f'X train shape = {X_train.shape}')
print(f'X test shape = {X_test.shape}')

X train shape = (142, 13)
X test shape = (36, 13)


In [10]:
# minor settings
np.set_printoptions(precision=2)
px.defaults.template = 'plotly_dark'

create pipeline

In [12]:
pipeline = make_pipeline(
    StandardScaler(),
    RandomForestClassifier(random_state=42)
)

pipeline.fit(X_train, y_train)

In [15]:
y.unique()

array([0, 1, 2])

In [16]:
y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)
training_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f'training accuracy = {training_accuracy}')
print(f'test accuracy = {test_accuracy}')

training accuracy = 1.0
test accuracy = 1.0


cross validation

In [18]:
cv_scores = cross_val_score(pipeline, X, y, cv=5)
print(f'cv scores = {cv_scores}')
print(f'mean score = {cv_scores.mean():.2f}')
print(f'+/- = {cv_scores.std():.2f}')

cv scores = [0.97 0.94 0.97 1.   1.  ]
mean score = 0.98
+/- = 0.02


In [20]:
feature_importance = pipeline.named_steps['randomforestclassifier'].feature_importances_
features = X.columns
indices = np.argsort(feature_importance)
# bar graph
fig = px.bar(
    x = feature_importance[indices],
    y = features[indices],
    title = 'Feature Importance',
    orientation = 'h',
)
fig.show()

In [21]:
dump(pipeline, 'model.joblib')

['model.joblib']

In [22]:
!pip install gradio

Installing collected packages: pydub, ffmpy, websockets, uvloop, ujson, tomlkit, semantic-version, ruff, python-multipart, python-dotenv, orjson, httptools, h11, dnspython, aiofiles, watchfiles, uvicorn, starlette, httpcore, email_validator, httpx, gradio-client, fastapi-cli, fastapi, gradio
Successfully installed aiofiles-23.2.1 dnspython-2.6.1 email_validator-2.2.0 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 gradio-4.37.2 gradio-client-1.0.2 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 orjson-3.10.6 pydub-0.25.1 python-dotenv-1.0.1 python-multipart-0.0.9 ruff-0.5.0 semantic-version-2.10.0 starlette-0.37.2 tomlkit-0.12.0 ujson-5.10.0 uvicorn-0.30.1 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3


In [33]:
import gradio as gr

def predict(*features):
    model = load('model.joblib')
    cols = X.columns
    features = pd.DataFrame([features], columns=cols)
    result = model.predict(features)
    return wine.target_names[result[0]]

ui = gr.Interface(
    fn = predict,
    inputs = [
        gr.Number(label=feature) for feature in X.columns
    ],
    outputs = 'text',
    examples=[
        X.iloc[0].tolist(),
        X.iloc[100].tolist(),
        X.iloc[160].tolist(),
    ]
)

ui.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://68410add42f7b0812a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


