In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

train_data = pd.read_csv('/content/drive/MyDrive/Machine Learning/Crazylinearregression/mobile phone price prediction.csv')
test_data = pd.read_excel('/content/drive/MyDrive/Machine Learning/Crazylinearregression/test_data.xlsx')

train_data = train_data.drop(['Unnamed: 0', 'Name','Processor'], axis=1)
train_data['Price'] = train_data['Price'].str.replace(',', '').astype(float)
train_data = train_data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

categorical_columns = ['No_of_sim', 'Ram', 'Battery', 'Display', 'Camera',
                       'External_Memory', 'Android_version', 'company',
                       'fast_charging', 'Screen_resolution', 'Processor_name']

for column in categorical_columns:
    train_data[column] = train_data[column].astype(str)

label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    train_data[column] = le.fit_transform(train_data[column])
    label_encoders[column] = le

train_data['Inbuilt_memory'] = train_data['Inbuilt_memory'].str.extract('(\d+)').astype(float)

X = train_data.drop('Price', axis=1)
y = train_data['Price']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
}

rf_model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

best_rf_model = grid_search.best_estimator_
best_rf_model.fit(X_train, y_train)

y_val_pred = best_rf_model.predict(X_val)
val_r2 = r2_score(y_val, y_val_pred)
print(f"Validation R-squared value: {val_r2}")

'''test_data = test_data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

for column in categorical_columns:
    test_data[column] = label_encoders[column].transform(test_data[column].astype(str))

test_data['Inbuilt_memory'] = test_data['Inbuilt_memory'].str.extract('(\d+)').astype(float)

X_test = test_data.drop(['Price'], axis=1)
y_test_pred = best_rf_model.predict(X_test)

print("Predicted Prices for Test Data:")
print(y_test_pred)'''





Fitting 3 folds for each of 36 candidates, totalling 108 fits
Validation R-squared value: 0.8757769585592425


'test_data = test_data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)\n\nfor column in categorical_columns:\n    test_data[column] = label_encoders[column].transform(test_data[column].astype(str))\n\ntest_data[\'Inbuilt_memory\'] = test_data[\'Inbuilt_memory\'].str.extract(\'(\\d+)\').astype(float)\n\nX_test = test_data.drop([\'Price\'], axis=1)\ny_test_pred = best_rf_model.predict(X_test)\n\nprint("Predicted Prices for Test Data:")\nprint(y_test_pred)'

In [None]:
import tensorflow_hub as hub


In [None]:
pip install tensorflow-hub tensorflow-text

In [None]:
encoder_url='https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H768_A-12/4'
preprocess_url='https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'

In [None]:
data=pd.read_csv('/content/drive/MyDrive/Machine Learning/Crazylinearregression/mobile phone price prediction.csv')
df=pd.DataFrame(data)


# Label Encoding

In [None]:
label_encoder=preprocessing.LabelEncoder()
df["No_of_sim"]=label_encoder.fit_transform(df["No_of_sim"])
df["Ram"]=label_encoder.fit_transform(df["Ram"])
df["Battery"]=label_encoder.fit_transform(df["Battery"])
df["Android_version"]=label_encoder.fit_transform(df["Android_version"])

df['Display'] = df['Display'].astype(str)
print(df['Display'])

df["Display"]=df["Display"].str.replace('inches',' ')

df["Display"]=label_encoder.fit_transform(df["Display"])
print(df["Display"])

df["Camera"]=label_encoder.fit_transform(df["Camera"])
df["External_Memory"]=label_encoder.fit_transform(df["External_Memory"])
df["company"]=label_encoder.fit_transform(df["company"])
df["Inbuilt_memory"]=label_encoder.fit_transform(df["Inbuilt_memory"])
df["Processor"]=label_encoder.fit_transform(df["Processor"])
df["fast_charging"]=label_encoder.fit_transform(df["fast_charging"])
df["Screen_resolution"]=label_encoder.fit_transform(df["Screen_resolution"])
df["Processor_name"]=label_encoder.fit_transform(df["Processor_name"])


0       6.6 inches
1       6.4 inches
2       6.6 inches
3       6.4 inches
4       6.5 inches
           ...    
1365    6.6 inches
1366    6.8 inches
1367    6.6 inches
1368    6.6 inches
1369     10 inches
Name: Display, Length: 1370, dtype: object
0       43
1       28
2       43
3       28
4       34
        ..
1365    43
1366    60
1367    43
1368    43
1369     0
Name: Display, Length: 1370, dtype: int64


In [None]:
price=df.pop("Price")
df["Price"]=price
df.fillna(0)
df['Price'] = df['Price'].str.replace(',', '')
df['Price'] = pd.to_numeric(df['Price'], errors='coerce').astype(int)

In [None]:
x_train=np.array(df.iloc[:,2:17],dtype='float64')
'''for i in range(len(x_train)):
  for j in range(len(x_train[0])):
    print(x_train[i][j])'''
y_train=np.array(df.iloc[:,17],dtype='float64')
print(y_train)



[  9999.   9990.  11999. ...  23990.  22499. 119990.]


In [None]:
def z_score(x_train):
  x_mean=np.mean(x_train)
  x_std=np.std(x_train)
  x_train=(x_train-x_mean)/x_std
  return x_train


In [None]:
def cost(x_train,y_train,w,b):
  loss=np.sum((np.dot(x_train,w)+b-y_train)**2)/len(x_train)
  return loss

In [None]:
def gradient_descent(x_train,y_train,w,b,alpha=0.001,num_iters=1000):
  m=len(x_train)
  for i in range(num_iters):
    f_wb=np.dot(x_train,w)+b
    error=f_wb-y_train
    wj=np.dot(x_train.T,error)/m
    bj=np.sum(error)/m
    w=w-wj*alpha
    b=b-bj*alpha

  return w,b


In [None]:

w=np.zeros((x_train.shape[1],1))
b=np.zeros(1)
x_train=z_score(x_train)
print(len(x_train))
y_train=z_score(y_train)
old_cost=0
while abs(cost(x_train,y_train,w,b)-old_cost)>0.001:
  if cost(x_train,y_train,w,b)-old_cost<0:
    print(f"Good going:  {abs(cost(x_train,y_train,w,b)-old_cost)}")
  else:
    print('Mistake')
  old_cost=cost(x_train,y_train,w,b)
  w,b=gradient_descent(x_train,y_train,w,b)





1370
Mistake
Good going:  1311.6257658609106
Good going:  37.46439800879475
Good going:  11.85197261273883
Good going:  3.917119445452882
Good going:  1.400089784481077
Good going:  0.5803794620955403
Good going:  0.3009358019862569
Good going:  0.19714703785783394
Good going:  0.15235941964496735
Good going:  0.12849802819642075
Good going:  0.1127916546937584
Good going:  0.10078424781550899
Good going:  0.09082302383727026
Good going:  0.08223172081057806
Good going:  0.07468956323074072
Good going:  0.06801369385750311
Good going:  0.0620801771793158
Good going:  0.056793998832055825
Good going:  0.05207694872508406
Good going:  0.04786226640980784
Good going:  0.04409195317604819
Good going:  0.04071519635973342
Good going:  0.03768729680643945
Good going:  0.034968851949969126
Good going:  0.03252508884972327
Good going:  0.03032529870601275
Good going:  0.02834234812761216
Good going:  0.026552252719427738
Good going:  0.02493380331477235
Good going:  0.023468237598502917
Good g

In [None]:
data2=pd.read_excel('/content/drive/MyDrive/Machine Learning/Crazylinearregression/test_data.xlsx')
df2=pd.DataFrame(data2)
df2.fillna(0)
df2['Price'] = df2['Price'].astype(str)

df2["Price"] = df2["Price"].str.replace(',', '')
df2["Price"] = pd.to_numeric(df2["Price"], errors='coerce').astype(int)
x_test=np.array(df2["Price"],dtype='float64')

In [None]:
df2["No_of_sim"] = label_encoder.fit_transform(df2["No_of_sim"])
df2["Ram"] = label_encoder.fit_transform(df2["Ram"])
df2["Battery"] = label_encoder.fit_transform(df2["Battery"])
df2["Android_version"] = label_encoder.fit_transform(df2["Android_version"])

df2['Display'] = df2['Display'].astype(str)

df2["Display"] = df2["Display"].str.replace('inches', ' ')

df2["Display"] = label_encoder.fit_transform(df2["Display"])

df2["Camera"] = label_encoder.fit_transform(df2["Camera"])
df2["External_Memory"] = label_encoder.fit_transform(df2["External_Memory"])
df2["company"] = label_encoder.fit_transform(df2["company"])
df2["Inbuilt_memory"] = label_encoder.fit_transform(df2["Inbuilt_memory"])
df2["Processor"] = label_encoder.fit_transform(df2["Processor"])
df2["fast_charging"] = label_encoder.fit_transform(df2["fast_charging"])
df2["Screen_resolution"] = label_encoder.fit_transform(df2["Screen_resolution"])
df2["Processor_name"] = label_encoder.fit_transform(df2["Processor_name"])

In [None]:
price=df2.pop("Price")
df2["Price"]=price
df2.fillna(0)
df2["Price"] = df2["Price"].astype(str)
df2['Price'] = df2['Price'].str.replace(',', '')
df2['Price'] = pd.to_numeric(df2['Price'], errors='coerce').astype(int)

In [None]:
x_test=np.array(df.iloc[:,2:17],dtype='float64')
'''for i in range(len(x_train)):
  for j in range(len(x_train[0])):
    print(x_train[i][j])'''
y_test=np.array(df.iloc[:,17],dtype='float64')

for i in range(len(x_test)):

  predict=np.dot(x_test[i:,],w)+b
  error=abs(predict-y_test[i])
  print(error)
y_val_pred = best_rf_model.predict(X_val)
val_r2 = r2_score(y_val, y_val_pred)
print(f"Validation R-squared value: {val_r2}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 [15983.19134903 15983.18873454 15983.77234794 ... 15987.25572692
  15986.82259223 16015.14367467]
 [15985.20562477 15985.20378374 15985.61474058 ... 15988.06759445
  15987.76259861 16007.70515359]]
[[16984.50943236 16984.507324   16984.97795598 ... 16987.78698937
  16987.43770501 17010.2761233 ]
 [16983.16353187 16983.1609067  16983.74690449 ... 16987.24451502
  16986.80961074 17015.24640068]
 [16983.26052981 16983.25794188 16983.83562535 ... 16987.2836107
  16986.85487697 17014.8881969 ]
 ...
 [16983.32772409 16983.32516196 16983.89708578 ... 16987.31069381
  16986.88623467 17014.64005508]
 [16983.19134903 16983.18873454 16983.77234794 ... 16987.25572692
  16986.82259223 17015.14367467]
 [16985.20562477 16985.20378374 16985.61474058 ... 16988.06759445
  16987.76259861 17007.70515359]]
[[16983.16353187 16983.1609067  16983.74690449 ... 16987.24451502
  16986.80961074 17015.24640068]
 [16983.26052981 16983.25794188 16983.