# TASK 1

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error,r2_score

In [2]:
df=pd.read_csv("C:\\Users\\umair\\OneDrive\\Documents\\Dataset .csv")

In [3]:
column_list = df.columns.tolist()

In [4]:
print("Column in the dataset:")
print(column_list)

Column in the dataset:
['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes']


In [5]:
print(df.dtypes)

Restaurant ID             int64
Restaurant Name          object
Country Code              int64
City                     object
Address                  object
Locality                 object
Locality Verbose         object
Longitude               float64
Latitude                float64
Cuisines                 object
Average Cost for two      int64
Currency                 object
Has Table booking        object
Has Online delivery      object
Is delivering now        object
Switch to order menu     object
Price range               int64
Aggregate rating        float64
Rating color             object
Rating text              object
Votes                     int64
dtype: object


In [6]:
X = df.drop("Aggregate rating",axis=1)
Y = df["Aggregate rating"]

In [7]:
non_numeric_columns = df.select_dtypes(exclude=['float64','int64']).columns
print('Non-numeric columns',non_numeric_columns)

Non-numeric columns Index(['Restaurant Name', 'City', 'Address', 'Locality', 'Locality Verbose',
       'Cuisines', 'Currency', 'Has Table booking', 'Has Online delivery',
       'Is delivering now', 'Switch to order menu', 'Rating color',
       'Rating text'],
      dtype='object')


In [8]:
df=df.drop('Cuisines',axis=1)

In [9]:
X_encoded = pd.get_dummies(X,drop_first=True)

In [10]:
X_train,X_test,Y_train,Y_test = train_test_split(X_encoded,Y,test_size=0.2,random_state=42)

In [11]:
model= DecisionTreeRegressor()
model.fit(X_train,Y_train)

DecisionTreeRegressor()

In [12]:
Y_pred = model.predict(X_test)

In [13]:
mse = mean_squared_error(Y_test,Y_pred)
r2 = r2_score(Y_test,Y_pred)
print("Mean Squared Error (MSE):",mse)
print("R-squared (R2):",r2)

Mean Squared Error (MSE): 0.04961276818419674
R-squared (R2): 0.978202815245042


In [14]:
importance = model.feature_importances_
feature_names = X_encoded.columns
feature_importance_df=pd.DataFrame({'Feature':feature_names, 'Importance':importance})
sorted_features = feature_importance_df.sort_values(by='Importance',ascending=False)
print("\nMost Influential Features:")
print(sorted_features)


Most Influential Features:
                                                Feature    Importance
20821                                Rating color_White  8.966539e-01
20819                               Rating color_Orange  5.152611e-02
20820                                  Rating color_Red  2.219755e-02
20822                               Rating color_Yellow  1.308405e-02
20818                                Rating color_Green  2.579984e-03
...                                                 ...           ...
19923                                 Cuisines_Gujarati -2.017661e-19
497                       Restaurant Name_Augsburg Haus -2.017661e-19
19825  Cuisines_Fast Food, Chinese, Mithai, Street Food -2.017661e-19
18668             Locality Verbose_Sector 31, Faridabad -2.017661e-19
8684   Address_2, Main Market, Malviya Nagar, New Delhi -3.026492e-19

[20828 rows x 2 columns]


# TASK 2

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error,r2_score

In [2]:
df = pd.read_csv("C:\\Users\\umair\\OneDrive\\Documents\\Dataset .csv")

In [3]:
df['Cuisines']=df['Cuisines'].str.lower()

In [4]:
sample_user_cuisine_preference = 'North Indian'
sample_user_price_range_preference = 2.0 
#Assuming 2.0 corresponds to moderate-priced restaurants

In [5]:
X = pd.get_dummies(df["Cuisines"], drop_first=True)
Y = df["Aggregate rating"]

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2, random_state=42)

In [7]:
model = DecisionTreeRegressor()
model.fit(X_train,Y_train)

DecisionTreeRegressor()

In [8]:
sample_user_cuisine_encoded = pd.get_dummies([sample_user_cuisine_preference.lower()], prefix=",prefix_sep=").reindex(columns=X.columns,fill_value=0)
predicted_rating = model.predict(sample_user_cuisine_encoded)
print(f"User Preference:Cuisine={sample_user_cuisine_preference}, Price Range={sample_user_price_range_preference}")
print(f"Predicted Restaurant Rating:{predicted_rating[0]}")

User Preference:Cuisine=North Indian, Price Range=2.0
Predicted Restaurant Rating:2.7636363636363632


In [9]:
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test,Y_pred)
r2 = r2_score(Y_test,Y_pred)
print("\nModel Preference:")
print("Mean Squared Error(MSE):",mse)
print("R-squared(R2):",r2)


Model Preference:
Mean Squared Error(MSE): 1.9332763175551513
R-squared(R2): 0.1506222567609634


# TASK 3

In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [11]:
np.random.seed(42)
num_samples = 1000
num_features = 5
X = np.random.randn(num_samples,num_features)
Y = np.random.randint(2,size=num_samples)

In [12]:
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=42)

In [13]:
logistic_model = LogisticRegression()

In [14]:
logistic_model.fit(X_train,Y_train)

LogisticRegression()

In [15]:
Y_pred = logistic_model.predict(X_test)

In [16]:
accuracy = accuracy_score(Y_test,Y_pred)
precision = precision_score(Y_test,Y_pred,average='weighted')
recall = recall_score(Y_test, Y_pred,average='weighted')
f1 = f1_score(Y_test, Y_pred,average='weighted')
confusion = confusion_matrix(Y_test,Y_pred)

In [17]:
print("Model Performance")
print("Accuracy:",accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F1 Score:",f1)
print("Confusion Matrix:\n",confusion)

Model Performance
Accuracy: 0.48
Precision: 0.47874188311688315
Recall: 0.48
F1 Score: 0.47817103808662437
Confusion Matrix:
 [[41 57]
 [47 55]]
