In [11]:
import pandas as pd

In [12]:
df = pd.read_csv('/content/drive/MyDrive/khdl/luxury_watches_processed_update.csv')
df.head()

Unnamed: 0,Brand,Model,Brand_encoded,Model_encoded,Case Material,Strap Material,Movement Type,Water Resistance,Case Diameter,Case Thickness,Band Width,Power Reserve,Dial Color,Crystal Material,Complication_Score,Price
0,Rolex,Submariner,10964.814815,9500.0,Stainless Steel,Steel,Automatic,300.0,40.0,13.0,20.0,48.0,Black,Sapphire,1,9500.0
1,Omega,Seamaster,5321.73913,5800.0,Titanium,Rubber,Automatic,500.0,43.5,14.47,21.0,60.0,Blue,Sapphire,1,5800.0
2,Tag Heuer,Carrera,3613.636364,4200.0,Stainless Steel,Leather,Automatic,100.0,41.0,13.0,20.0,42.0,White,Sapphire,3,4200.0
3,Breitling,Navitimer,4693.75,7900.0,Stainless Steel,Steel,Automatic,30.0,43.0,14.25,22.0,70.0,Black,Sapphire,3,7900.0
4,Cartier,Tank Solo,6715.789474,2800.0,Stainless Steel,Leather,Quartz,30.0,35.0,6.05,20.0,55.0,Silver,Sapphire,0,2800.0


In [13]:
print(df['Case Material'].value_counts())
print(df['Strap Material'].value_counts())
print(df['Movement Type'].value_counts())
print(df['Dial Color'].value_counts())
print(df['Crystal Material'].value_counts())

Case Material
Stainless Steel    405
Titanium            37
Rose Gold           26
White Gold          21
Ceramic             10
Yellow Gold          4
Carbon Fiber         3
Bronze               1
Name: count, dtype: int64
Strap Material
Leather      224
Steel        166
Rubber        74
Fabric        11
Canvas        10
Nato           5
Jubilee        5
Silicone       5
Titanium       2
Alligator      2
Rose Gold      2
Textile        1
Name: count, dtype: int64
Movement Type
Automatic    442
Manual        51
Quartz        12
Eco-Drive      2
Name: count, dtype: int64
Dial Color
Black        217
Blue         136
Silver       118
White         33
Ivory          1
Champagne      1
Grey           1
Name: count, dtype: int64
Crystal Material
Sapphire    489
Hesalite     13
Hardlex       3
Mineral       2
Name: count, dtype: int64


In [14]:
X = df.drop(['Brand', 'Model', 'Price'], axis=1)
y = df['Price']

In [15]:
X.head()

Unnamed: 0,Brand_encoded,Model_encoded,Case Material,Strap Material,Movement Type,Water Resistance,Case Diameter,Case Thickness,Band Width,Power Reserve,Dial Color,Crystal Material,Complication_Score
0,10964.814815,9500.0,Stainless Steel,Steel,Automatic,300.0,40.0,13.0,20.0,48.0,Black,Sapphire,1
1,5321.73913,5800.0,Titanium,Rubber,Automatic,500.0,43.5,14.47,21.0,60.0,Blue,Sapphire,1
2,3613.636364,4200.0,Stainless Steel,Leather,Automatic,100.0,41.0,13.0,20.0,42.0,White,Sapphire,3
3,4693.75,7900.0,Stainless Steel,Steel,Automatic,30.0,43.0,14.25,22.0,70.0,Black,Sapphire,3
4,6715.789474,2800.0,Stainless Steel,Leather,Quartz,30.0,35.0,6.05,20.0,55.0,Silver,Sapphire,0


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [17]:
cat_cols = ['Case Material', 'Strap Material', 'Movement Type', 'Dial Color', 'Crystal Material']

df_encoded = pd.get_dummies(df, columns=cat_cols, drop_first=True)

In [18]:
features = [
    'Case Diameter', 'Case Thickness', 'Band Width', 'Power Reserve', 'Complication_Score',
    'Water Resistance', 'Brand_encoded', 'Model_encoded'
] + [col for col in df_encoded.columns if any(cat in col for cat in cat_cols)]

X = df_encoded[features]
y = df_encoded['Price']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

import numpy as np
rmse = np.sqrt(mse)
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² score: {r2:.4f}")

MSE: 6444042.17
RMSE: 2538.51
R² score: 0.8939


In [20]:
coeff_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': lr.coef_
}).sort_values(by='Coefficient', key=abs, ascending=False)

print(coeff_df.head(10))

                      Feature   Coefficient
20   Strap Material_Rose Gold  17969.624821
32           Dial Color_Ivory   6203.729392
14  Case Material_Yellow Gold   6031.916346
31            Dial Color_Grey  -5915.404733
30       Dial Color_Champagne  -3796.270608
25    Strap Material_Titanium  -3080.413757
19        Strap Material_Nato  -2916.331744
15      Strap Material_Canvas  -2673.307820
16      Strap Material_Fabric  -2489.347861
21      Strap Material_Rubber  -2036.691091
