In [24]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import OneHotEncoder , RobustScaler
from category_encoders import BinaryEncoder
from sklearn.linear_model import LogisticRegression

In [25]:
df = pd.read_csv("EDA_OUT.csv")
df

Unnamed: 0,online_order,book_table,votes,location,approx_cost(for two people),listed_in(type),listed_in(city),cuisines_counts,rest_type_counts,Target
0,Yes,Yes,775,Banashankari,800.0,Buffet,Banashankari,3,1,1
1,Yes,No,787,Banashankari,800.0,Buffet,Banashankari,3,1,1
2,Yes,No,918,Banashankari,800.0,Buffet,Banashankari,3,2,1
3,No,No,88,Banashankari,300.0,Buffet,Banashankari,2,1,0
4,No,No,166,Basavanagudi,600.0,Buffet,Banashankari,2,1,1
...,...,...,...,...,...,...,...,...,...,...
41185,No,No,34,Whitefield,800.0,Pubs and bars,Whitefield,2,2,0
41186,No,No,81,Whitefield,800.0,Pubs and bars,Whitefield,4,2,0
41187,No,No,27,Whitefield,1500.0,Pubs and bars,Whitefield,1,1,0
41188,No,Yes,236,other,2500.0,Pubs and bars,Whitefield,1,1,1


In [26]:
Encoder = ColumnTransformer(transformers=[("OHE",OneHotEncoder(sparse=False , drop="first" ), ["online_order" , "book_table","listed_in(type)"]) , ("BE",BinaryEncoder() , ["location" , "listed_in(city)"] )] , remainder = "passthrough")

In [27]:
steps = []
steps.append(("Encoder" , Encoder))
steps.append(("Scaler" , RobustScaler()))
steps.append(("Model" , LogisticRegression()))
pipeline = Pipeline(steps=steps)

In [28]:
x = df.drop("Target" , axis = 1 )
y = df["Target"]

In [29]:
results = cross_validate(pipeline , x ,y , cv = 5 , scoring="accuracy" , return_train_score=True)

In [30]:
results["train_score"].mean()

0.7793942704539938

In [31]:
results["test_score"].mean()

0.7718135469774217

In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [33]:
models = list()
models.append(("LR" , LogisticRegression()))
models.append(("KNN" , KNeighborsClassifier()))
models.append(("CART" , DecisionTreeClassifier()))
models.append(("RF" , RandomForestClassifier()))
models.append(("xg" , XGBClassifier()))

In [34]:
for model in models:
    steps = []
    steps.append(("Encoder" , Encoder))
    steps.append(("Scaler" , RobustScaler()))
    steps.append(model)
    pipeline = Pipeline(steps=steps)
    scores = cross_validate(pipeline ,x ,y , cv = 5 , scoring="accuracy" , return_train_score=True)
    print(model[0])
    print("Train_accuracy" , scores["train_score"].mean() )
    print("-" * 10)
    print("Test_accuracy" , scores["test_score"].mean())
    print("-" * 20)
    print("\n")
    

LR
Train_accuracy 0.7793942704539938
----------
Test_accuracy 0.7718135469774217
--------------------


KNN
Train_accuracy 0.8609796067006557
----------
Test_accuracy 0.7890507404709881
--------------------


CART
Train_accuracy 0.998913571255159
----------
Test_accuracy 0.8907987375576598
--------------------


RF
Train_accuracy 0.9989014323865015
----------
Test_accuracy 0.878150036416606
--------------------


xg
Train_accuracy 0.9194950230638504
----------
Test_accuracy 0.8614712308812817
--------------------




In [35]:
from sklearn.model_selection import GridSearchCV

In [36]:
RandomForestClassifier()

In [37]:
params = {
    'Model__learning_rate': [0.01, 0.1, 0.2], 
    'Model__n_estimators': [110,120,130],  # Number of trees (boosting rounds)
    'Model__reg_alpha': [0, 0.1, 0.5]

}


In [38]:
steps = []
steps.append(("Encoder" , Encoder))
steps.append(("Scaler" , RobustScaler()))
steps.append(("Model" , XGBClassifier()))
pipeline = Pipeline(steps=steps)

In [39]:
grid_search = GridSearchCV(estimator=pipeline , param_grid=params ,
                           cv =10 ,scoring="accuracy" ,
                           return_train_score=True , n_jobs = -1)
grid_search.fit(x,y)

In [40]:
grid_search.best_params_

{'Model__learning_rate': 0.2,
 'Model__n_estimators': 130,
 'Model__reg_alpha': 0.5}

In [41]:
grid_search.cv_results_["mean_train_score"].mean()

0.8605700572574949

In [42]:
grid_search.cv_results_["mean_test_score"].mean()

0.8159549692931584

In [43]:
final_model = grid_search.best_estimator_

In [44]:
import joblib

In [45]:
joblib.dump(final_model , "Model.pkl")
joblib.dump(x.columns , "Inputs.pkl")

['Inputs.pkl']

In [40]:
x.to_csv()

Unnamed: 0,online_order,book_table,votes,location,approx_cost(for two people),listed_in(type),listed_in(city),cuisines_counts,rest_type_counts
0,Yes,Yes,775,Banashankari,800.0,Buffet,Banashankari,3,1
1,Yes,No,787,Banashankari,800.0,Buffet,Banashankari,3,1
2,Yes,No,918,Banashankari,800.0,Buffet,Banashankari,3,2
3,No,No,88,Banashankari,300.0,Buffet,Banashankari,2,1
4,No,No,166,Basavanagudi,600.0,Buffet,Banashankari,2,1
...,...,...,...,...,...,...,...,...,...
41185,No,No,34,Whitefield,800.0,Pubs and bars,Whitefield,2,2
41186,No,No,81,Whitefield,800.0,Pubs and bars,Whitefield,4,2
41187,No,No,27,Whitefield,1500.0,Pubs and bars,Whitefield,1,1
41188,No,Yes,236,other,2500.0,Pubs and bars,Whitefield,1,1


In [54]:
x

Unnamed: 0,online_order,book_table,votes,location,approx_cost(for two people),listed_in(type),listed_in(city),cuisines_counts,rest_type_counts
0,Yes,Yes,775,Banashankari,800.0,Buffet,Banashankari,3,1
1,Yes,No,787,Banashankari,800.0,Buffet,Banashankari,3,1
2,Yes,No,918,Banashankari,800.0,Buffet,Banashankari,3,2
3,No,No,88,Banashankari,300.0,Buffet,Banashankari,2,1
4,No,No,166,Basavanagudi,600.0,Buffet,Banashankari,2,1
...,...,...,...,...,...,...,...,...,...
41185,No,No,34,Whitefield,800.0,Pubs and bars,Whitefield,2,2
41186,No,No,81,Whitefield,800.0,Pubs and bars,Whitefield,4,2
41187,No,No,27,Whitefield,1500.0,Pubs and bars,Whitefield,1,1
41188,No,Yes,236,other,2500.0,Pubs and bars,Whitefield,1,1


In [43]:
x["location"].unique()

array(['Banashankari', 'Basavanagudi', 'other', 'Jayanagar', 'JP Nagar',
       'Bannerghatta Road', 'BTM', 'Electronic City', 'Shanti Nagar',
       'Koramangala 5th Block', 'Richmond Road', 'HSR',
       'Koramangala 7th Block', 'Bellandur', 'Sarjapur Road',
       'Marathahalli', 'Whitefield', 'Old Airport Road', 'Indiranagar',
       'Koramangala 1st Block', 'Frazer Town', 'MG Road', 'Brigade Road',
       'Lavelle Road', 'Church Street', 'Ulsoor', 'Residency Road',
       'Shivajinagar', 'St. Marks Road', 'Cunningham Road',
       'Commercial Street', 'Vasanth Nagar', 'Domlur',
       'Koramangala 8th Block', 'Ejipura', 'Jeevan Bhima Nagar',
       'Kammanahalli', 'Koramangala 6th Block', 'Brookefield',
       'Koramangala 4th Block', 'Banaswadi', 'Kalyan Nagar',
       'Malleshwaram', 'Rajajinagar', 'New BEL Road'], dtype=object)

In [47]:
final_model.predict

<bound method Pipeline.predict of Pipeline(steps=[('Encoder',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('OHE',
                                                  OneHotEncoder(drop='first',
                                                                sparse=False),
                                                  ['online_order', 'book_table',
                                                   'listed_in(type)']),
                                                 ('BE', BinaryEncoder(),
                                                  ['location',
                                                   'listed_in(city)'])])),
                ('Scaler', RobustScaler()),
                ('Model',
                 XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
                               colsample_bylevel...
                               gamma=0, gpu_id=-1, grow_policy='depthwise',
                     

In [61]:
x.sample(1)

Unnamed: 0,online_order,book_table,votes,location,approx_cost(for two people),listed_in(type),listed_in(city),cuisines_counts,rest_type_counts
17740,Yes,No,23,BTM,600.0,Delivery,JP Nagar,3,2


In [65]:
final_model.predict(x.sample(1))[0]

0

In [70]:
%%writefile app_3rd.py
import streamlit as st
import pandas as pd
import joblib


Inputs = joblib.load("Inputs.pkl")
Model = joblib.load("Model.pkl")

def prediction(online_order, book_table, votes, location,approx_cost,listed_in,listed_in_city,cuisines_counts,rest_type_counts):
    test_df = pd.DataFrame(columns=Inputs)
    test_df.at[0,"online_order"] = online_order
    test_df.at[0,"book_table"] = book_table
    test_df.at[0,"votes"] = votes
    test_df.at[0,"location"] = location
    test_df.at[0,"rest_type_counts"] = rest_type_counts
    test_df.at[0,"approx_cost(for two people)"] = approx_cost
    test_df.at[0,"cuisines_counts"] = cuisines_counts
    test_df.at[0,"listed_in(type)"] = listed_in
    test_df.at[0,"listed_in(city)"] = listed_in_city
    st.dataframe(test_df)
    result = Model.predict(test_df)[0]
    return result


    
def main():
    st.title("Bangolre Resturants")
    online_order = st.selectbox("online" , ['Yes', 'No'])
    book_table = st.selectbox("book_table" , ['Yes', 'No'])
    votes = st.slider("votes" , min_value= 0 , max_value=16832 , value=0,step=1)
    location = st.selectbox("location" ,['Banashankari', 'Basavanagudi', 'other', 'Jayanagar', 'JP Nagar',
       'Bannerghatta Road', 'BTM', 'Electronic City', 'Shanti Nagar',
       'Koramangala 5th Block', 'Richmond Road', 'HSR',
       'Koramangala 7th Block', 'Bellandur', 'Sarjapur Road',
       'Marathahalli', 'Whitefield', 'Old Airport Road', 'Indiranagar',
       'Koramangala 1st Block', 'Frazer Town', 'MG Road', 'Brigade Road',
       'Lavelle Road', 'Church Street', 'Ulsoor', 'Residency Road',
       'Shivajinagar', 'St. Marks Road', 'Cunningham Road',
       'Commercial Street', 'Vasanth Nagar', 'Domlur',
       'Koramangala 8th Block', 'Ejipura', 'Jeevan Bhima Nagar',
       'Kammanahalli', 'Koramangala 6th Block', 'Brookefield',
       'Koramangala 4th Block', 'Banaswadi', 'Kalyan Nagar',
       'Malleshwaram', 'Rajajinagar', 'New BEL Road'] )
    rest_type_counts = st.selectbox("rest_type_counts" ,[1,2])
    approx_cost = st.slider("approx_cost(for two people)" , min_value=40, max_value=6000, value=0, step=1)
    cuisines_counts = st.selectbox("cuisines_numbers" , [3, 2, 1, 4, 5, 8, 7, 6])
    listed_in = st.selectbox("listed_in(type)" , ['Buffet', 'Cafes', 'Delivery', 'Desserts', 'Dine-out',
       'Drinks & nightlife', 'Pubs and bars'])
    listed_in_city = st.selectbox("listed_in(city)" , ['Banashankari', 'Bannerghatta Road', 'Basavanagudi', 'Bellandur',
       'Brigade Road', 'Brookefield', 'BTM', 'Church Street',
       'Electronic City', 'Frazer Town', 'HSR', 'Indiranagar',
       'Jayanagar', 'JP Nagar', 'Kalyan Nagar', 'Kammanahalli',
       'Koramangala 4th Block', 'Koramangala 5th Block',
       'Koramangala 6th Block', 'Koramangala 7th Block', 'Lavelle Road',
       'Malleshwaram', 'Marathahalli', 'MG Road', 'New BEL Road',
       'Old Airport Road', 'Rajajinagar', 'Residency Road',
       'Sarjapur Road', 'Whitefield'])
    
    if st.button("predict"):
        result = prediction(online_order, book_table, votes, location,approx_cost,listed_in,listed_in_city,cuisines_counts,rest_type_counts)
        label = ["Fail" , "Success"]
        st.text(f"The Resturant will {label[result]}")
        
if __name__ == '__main__':
    main()    
    

Overwriting app_3rd.py


In [23]:
import pandas as pd
print(pd.__version__)


1.4.3
