In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib


In [None]:

url_zomato = "https://github.com/dsrscientist/dataset4/blob/main/zomato.csv?raw=true"
url_country = "https://github.com/FlipRoboTechnologies/ML_-Datasets/blob/main/Z_Restaurant/Country-Code.xlsx?raw=true"
df_zomato = pd.read_csv(url_zomato)
df_country = pd.read_excel(url_country)

In [None]:

df = pd.merge(df_zomato, df_country, how='left', left_on='Country Code', right_on='Country Code')

In [None]:

df.head()


In [None]:

print(df.describe())
print(df.info())
print(df.isnull().sum())

In [None]:

df.hist(figsize=(12, 10))
plt.show()


In [None]:

plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

In [None]:

df.drop_duplicates(inplace=True)
df.fillna(df.mean(), inplace=True)


In [None]:

categorical_features = df.select_dtypes(include=['object']).columns
for feature in categorical_features:
    df[feature] = LabelEncoder().fit_transform(df[feature])

In [None]:

X = df.drop(['Average Cost for two', 'Price range'], axis=1)
y_cost = df['Average Cost for two']
y_price = df['Price range']

In [None]:

X_train_cost, X_test_cost, y_train_cost, y_test_cost = train_test_split(X, y_cost, test_size=0.2, random_state=42)
X_train_price, X_test_price, y_train_price, y_test_price = train_test_split(X, y_price, test_size=0.2, random_state=42)

In [None]:

scaler = StandardScaler()
X_train_cost = scaler.fit_transform(X_train_cost)
X_test_cost = scaler.transform(X_test_cost)
X_train_price = scaler.fit_transform(X_train_price)
X_test_price = scaler.transform(X_test_price)


In [None]:

# Linear Regression
linear_model_cost = LinearRegression()
linear_model_cost.fit(X_train_cost, y_train_cost)
y_pred_lin_cost = linear_model_cost.predict(X_test_cost)

print("Linear Regression (Cost) - MAE:", mean_absolute_error(y_test_cost, y_pred_lin_cost))
print("Linear Regression (Cost) - MSE:", mean_squared_error(y_test_cost, y_pred_lin_cost))
print("Linear Regression (Cost) - R2 Score:", r2_score(y_test_cost, y_pred_lin_cost))

In [None]:

# Random Forest Regressor
forest_model_cost = RandomForestRegressor()
forest_model_cost.fit(X_train_cost, y_train_cost)
y_pred_forest_cost = forest_model_cost.predict(X_test_cost)

print("Random Forest (Cost) - MAE:", mean_absolute_error(y_test_cost, y_pred_forest_cost))
print("Random Forest (Cost) - MSE:", mean_squared_error(y_test_cost, y_pred_forest_cost))
print("Random Forest (Cost) - R2 Score:", r2_score(y_test_cost, y_pred_forest_cost))

In [None]:

# XGBoost Regressor
xgb_model_cost = XGBRegressor()
xgb_model_cost.fit(X_train_cost, y_train_cost)
y_pred_xgb_cost = xgb_model_cost.predict(X_test_cost)

print("XGBoost (Cost) - MAE:", mean_absolute_error(y_test_cost, y_pred_xgb_cost))
print("XGBoost (Cost) - MSE:", mean_squared_error(y_test_cost, y_pred_xgb_cost))
print("XGBoost (Cost) - R2 Score:", r2_score(y_test_cost, y_pred_xgb_cost))


In [None]:

# Linear Regression
linear_model_price = LinearRegression()
linear_model_price.fit(X_train_price, y_train_price)
y_pred_lin_price = linear_model_price.predict(X_test_price)

print("Linear Regression (Price) - MAE:", mean_absolute_error(y_test_price, y_pred_lin_price))
print("Linear Regression (Price) - MSE:", mean_squared_error(y_test_price, y_pred_lin_price))
print("Linear Regression (Price) - R2 Score:", r2_score(y_test_price, y_pred_lin_price))

In [None]:

# Random Forest Regressor
forest_model_price = RandomForestRegressor()
forest_model_price.fit(X_train_price, y_train_price)
y_pred_forest_price = forest_model_price.predict(X_test_price)

print("Random Forest (Price) - MAE:", mean_absolute_error(y_test_price, y_pred_forest_price))
print("Random Forest (Price) - MSE:", mean_squared_error(y_test_price, y_pred_forest_price))
print("Random Forest (Price) - R2 Score:", r2_score(y_test_price, y_pred_forest_price))

In [None]:

# XGBoost Regressor
xgb_model_price = XGBRegressor()
xgb_model_price.fit(X_train_price, y_train_price)
y_pred_xgb_price = xgb_model_price.predict(X_test_price)

print("XGBoost (Price) - MAE:", mean_absolute_error(y_test_price, y_pred_xgb_price))
print("XGBoost (Price) - MSE:", mean_squared_error(y_test_price, y_pred_xgb_price))
print("XGBoost (Price) - R2 Score:", r2_score(y_test_price, y_pred_xgb_price))


In [None]:

joblib.dump(forest_model_cost, 'best_zomato_cost_model_final.pkl')
joblib.dump(xgb_model_price, 'best_zomato_price_model_final.pkl')
