TASK 1



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_csv("/content/Dataset .csv")

In [None]:
columns_list=df.columns.tolist()

In [None]:
print("Columns in the dataset:")
print(columns_list)

Columns in the dataset:
['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes']


In [None]:
print(df.dtypes)

Restaurant ID             int64
Restaurant Name          object
Country Code              int64
City                     object
Address                  object
Locality                 object
Locality Verbose         object
Longitude               float64
Latitude                float64
Cuisines                 object
Average Cost for two      int64
Currency                 object
Has Table booking        object
Has Online delivery      object
Is delivering now        object
Switch to order menu     object
Price range               int64
Aggregate rating        float64
Rating color             object
Rating text              object
Votes                     int64
dtype: object


In [None]:
x=df.drop("Aggregate rating",axis=1)
y=df["Aggregate rating"]

In [None]:
non_numeric_columns=df.select_dtypes(exclude=['float64','int64']).columns
print("Non-numeric columns:",non_numeric_columns)

Non-numeric columns: Index(['Restaurant Name', 'City', 'Address', 'Locality', 'Locality Verbose',
       'Cuisines', 'Currency', 'Has Table booking', 'Has Online delivery',
       'Is delivering now', 'Switch to order menu', 'Rating color',
       'Rating text'],
      dtype='object')


In [None]:
df=df.drop('Cuisines',axis=1)

In [None]:
x_encoded=pd.get_dummies(x,drop_first=True)

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x_encoded,y,test_size=0.2,random_state=42)

In [None]:
model=DecisionTreeRegressor()
model.fit(x_train,y_train)

In [None]:
y_pred=model.predict(x_test)

In [None]:
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)

print("Mean squared Error(MSE):",mse)
print("R-squared(R2):",r2)

Mean squared Error(MSE): 0.0489586603872318
R-squared(R2): 0.9784901950672517


In [None]:
importance =model.feature_importances_
feature_names=x_encoded.columns
feature_importance_df=pd.DataFrame({'Feature':feature_names,'Importance':importance})
sorted_features=feature_importance_df.sort_values(by='Importance',ascending=False)

In [None]:
print("\nMost Influential Features Task 1:")
print(sorted_features)


Most Influential Features Task 1:
                                                 Feature    Importance
20825                              Rating text_Not rated  8.966539e-01
20819                                Rating color_Orange  5.152611e-02
20826                                   Rating text_Poor  2.219755e-02
20824                                   Rating text_Good  1.308405e-02
20818                                 Rating color_Green  2.579984e-03
...                                                  ...           ...
20103                           Cuisines_Japanese, Steak -1.008831e-19
3198      Restaurant Name_Jaco's Bayfront Bar and Grille -2.017661e-19
2700                   Restaurant Name_Grand Madras Cafe -2.017661e-19
15969  Address_Shop 8, Mount Kailash Market, East of ... -2.017661e-19
17659                                 Locality_Vikaspuri -1.614129e-18

[20828 rows x 2 columns]


TASK 2

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("/content/Dataset .csv")

In [None]:
df['Cuisines']=df['Cuisines'].str.lower()

In [None]:
le=LabelEncoder()
df['Cuisine_Encoded']=le.fit_transform(df['Cuisines'])

In [None]:
x=df[['Cuisine_Encoded']]
y=df['Aggregate rating']

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
model=DecisionTreeRegressor()
model.fit(x_train,y_train)

In [None]:
y_pred=model.predict(x_test)

In [None]:
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)

print("Mean square Error(MSE):",mse)
print("R_square(R2):",r2)

Mean square Error(MSE): 1.941493078539205
R_square(R2): 0.1470122534531093


In [None]:
sample_user_preferences = 'Italian,Chinese'
preferences_list = [pref.strip().lower() for pref in sample_user_preferences.split(',')]

# Encode each preference separately
encoded_preferences = le.transform(preferences_list)

encoded_preferences_reshaped = encoded_preferences.reshape(-1, 1)

# Now you can use the encoded preferences for prediction
predicted_rating = model.predict(encoded_preferences_reshaped)[0]



In [None]:
print(f"predicted Rating for user perferences:{sample_user_preferences}:{predicted_rating:.2f}")

predicted Rating for user perferences:Italian,Chinese:3.54


Task 3

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
np.random.seed(42)
num_sample = 1000
num_features = 5

In [None]:
x = np.random.randn(num_sample, num_features)
y = np.random.randint(2, size=num_sample)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5)
grid_search.fit(x_train, y_train)

In [None]:
best_logistic_model = grid_search.best_estimator_

In [None]:
y_pred = best_logistic_model.predict(x_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
confusion = confusion_matrix(y_test, y_pred)

In [None]:
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Confusion Matrix:\n", confusion)

Accuracy: 0.48
Precision: 0.47259467040673214
Recall: 0.48
F1-score: 0.46049999999999996
Confusion Matrix:
 [[28 70]
 [34 68]]


TASK 4

In [None]:
import pandas as pd
import folium
from folium import plugins

In [None]:
df=pd.read_csv("/content/Dataset .csv")

In [None]:
map_center=[df['Latitude'].mean(),df['Longitude'].mean()]

In [None]:
map_restaurants=folium.Map(location=map_center,zoom_start=12)

In [None]:
for index,row in df.iterrows():
  popup_text=f"Restaurant: {row['Restaurant Name']}<br>Location: {row['Locality']},{row['City']}<br>Cuisines:{row['Cuisines']}<br>Rating:{row['Aggregate rating']}"
  folium.Marker([row['Latitude'],row['Longitude']],popup=popup_text).add_to(map_restaurants)

In [None]:
map_restaurants.save("restaurant_map.html")

In [None]:
restaurant_count_by_city=df.groupby('City').size().reset_index(name='Restaurant Count')

In [None]:
print(restaurant_count_by_city)

In [None]:
average_rating_by_city=df.groupby('City')['Aggregate rating'].mean().reset_index(name='Average Rating')

In [None]:
print(average_rating_by_city)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.boxplot(x='City',y='Aggregate rating',data=df)
plt.xticks(rotation=90)
plt.title('Distribution of Rating by City')
plt.show()