In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

train_data = pd.read_csv('/content/drive/MyDrive/Machine Learning/Crazylinearregression/mobile phone price prediction.csv')
test_data = pd.read_excel('/content/drive/MyDrive/Machine Learning/Crazylinearregression/test_data.xlsx')

train_data = train_data.drop(['Unnamed: 0', 'Name','Processor'], axis=1)
train_data['Price'] = train_data['Price'].str.replace(',', '').astype(float)
train_data = train_data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

categorical_columns = ['No_of_sim', 'Ram', 'Battery', 'Display', 'Camera',
                       'External_Memory', 'Android_version', 'company',
                       'fast_charging', 'Screen_resolution', 'Processor_name']

for column in categorical_columns:
    train_data[column] = train_data[column].astype(str)

label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    train_data[column] = le.fit_transform(train_data[column])
    label_encoders[column] = le

train_data['Inbuilt_memory'] = train_data['Inbuilt_memory'].str.extract('(\d+)').astype(float)

X = train_data.drop('Price', axis=1)
y = train_data['Price']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}

rf_model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

best_rf_model = grid_search.best_estimator_
best_rf_model.fit(X_train, y_train)

y_val_pred = best_rf_model.predict(X_val)
val_r2 = r2_score(y_val, y_val_pred)
print(f"Validation R-squared value: {val_r2}")

'''test_data = test_data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

for column in categorical_columns:
    test_data[column] = label_encoders[column].transform(test_data[column].astype(str))

test_data['Inbuilt_memory'] = test_data['Inbuilt_memory'].str.extract('(\d+)').astype(float)

X_test = test_data.drop(['Price'], axis=1)
y_test_pred = best_rf_model.predict(X_test)

print("Predicted Prices for Test Data:")
print(y_test_pred)'''