In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [None]:
data = pd.read_csv("clean-data.csv")

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
#copy data to save back up
features = data.copy()

In [None]:
features.info()

In [None]:
features.head()

In [None]:
#select the independent and dependent variables

X = features.drop(columns='price', axis=1)
y = features['price']

In [None]:
#train test split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

In [None]:
num_features = features[["carat", "depth","table", "price","x","y","z"]]
cat_features = features[["cut", "color", "clarity"]]

In [None]:
stan = StandardScaler()

X_train = stan.fit_transform(X_train) 
X_test = stan.fit_transform(X_test)

#pipeline numerical

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ("std_scaler", StandardScaler())
])

X_train = num_pipeline.fit_transform(num_features)

**Linear Regression**

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train,y_train)

In [None]:
from sklearn.metrics import mean_squared_error
#run the prediction
lr_pred = lr.predict(X_test)

#calculate the mean square error
lr_mse = mean_squared_error(lr_pred, y_test)

#calculate the root mean squared error
lr_rmse = np.sqrt(lr_mse)
print("The root mean squared error in Linear Regression is", lr_rmse)

**Support Vector Machines Regressor**

In [None]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV

ray = np.arange(1,15,2)

parameters = {'kernel':('linear','rbf','poly','sigmoid'), 
                'C': np.logspace(-3, 3, 5),
                'gamma': np.logspace(-3, 3, 5)}

regsvr = svm.SVR()

#fit the parameters to the gridsearch

cv_regsvr = GridSearchCV(regsvr, parameters, cv= 10)


cv_regsvr.fit(X_train, y_train)


In [None]:
print("tuned hyperparameters: (best parameters) ", cv_regsvr.best_params_)
print("accuracy : ", cv_regsvr.best_score_)

In [None]:
pred_svr = cv_regsvr.predict(X_test)

#calculate the RMSE
mse_svr = mean_squared_error(pred_svr, y_test)
rmse_svr = np.sqrt(mse_svr)

print("The Root mean Squared error in Support Vector Machines is: ", rmse_svr)

**Random Forest Regressor**

In [None]:
from sklearn.ensemble import RandomForestRegressor

trees = np.arange(100, 500, 50)
parameters = {}

rfr = RandomForestRegressor(n_estimators= trees, random_state=0)

rfr_cv = GridSearchCV(rfr, cv=10)

rfr_cv.fit(X_train, y_train)