In [13]:
# ─── 1) Data Reading and Libraries ─────────────────────────────────────────────
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
df = pd.read_csv('../data/processed/merged_analysis.csv')


# Convert datetime
for col in ['Week', 'Date']:
    df[col] = pd.to_datetime(df[col], errors='coerce')
    med = df[col].median()
    df[col] = df[col].fillna(med)
    df[f"{col}_ord"] = df[col].map(lambda d: d.toordinal())
    df.drop(col, axis=1, inplace=True)

# Target
X = df[['Week_ord', 'Date_ord']]
y = df['USD_TRY'].copy().dropna()
X = X.loc[y.index]

# ─── 2) Split Train Test ───────────────────────────────────────────────────
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ─── 3) Scaler ────────────────────────────────────────────────────────
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# ─── 4) Models ──────────────────────────────────────────
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree      import DecisionTreeRegressor
from sklearn.ensemble  import RandomForestRegressor

models = {
    'KNN (k=5)'       : KNeighborsRegressor(n_neighbors=5),
    'Decision Tree'   : DecisionTreeRegressor(random_state=42),
    'Random Forest'   : RandomForestRegressor(n_estimators=100, random_state=42),
}

preds = {}
for name, mdl in models.items():
    mdl.fit(X_train_s, y_train)
    preds[name] = mdl.predict(X_test_s)

# ─── 5) Metrics ───────────────────────────────────────────────
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

results = []
for name, y_pred in preds.items():
    r2   = r2_score(y_test, y_pred)
    mse  = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y_test, y_pred)
    results.append((name, r2, mse, rmse, mae))

metrics_df = pd.DataFrame(
    results,
    columns=['Model', 'R²', 'MSE', 'RMSE', 'MAE']
).set_index('Model')

print(metrics_df)

# ─── 6) Comparison ────────────────────────────────
sample_idx = np.random.RandomState(42).choice(len(y_test), 10, replace=False)
comp = pd.DataFrame({
    'Real': y_test.values[sample_idx],
    '(KNN)': preds['KNN (k=5)'][sample_idx],
    '(DT)':   preds['Decision Tree'][sample_idx],
    '(RF)':   preds['Random Forest'][sample_idx],
}, index=y_test.index[sample_idx])

print("\n10 Random Sample Comparison For Usd to Try:\n", comp)


                     R²       MSE      RMSE       MAE
Model                                                
KNN (k=5)      0.995913  0.358260  0.598549  0.294337
Decision Tree  0.998428  0.137820  0.371241  0.193607
Random Forest  0.998912  0.095377  0.308831  0.155853

10 Random Sample Comparison For Usd to Try:
           Real      (KNN)       (DT)       (RF)
116   7.943958   7.588830   7.477888   7.686385
84    7.300596   7.201551   7.025704   7.101137
132   8.610068   8.597508   8.558652   8.578679
180  17.257456  17.057594  17.336640  16.964853
204  18.623406  18.685550  18.666778  18.631864
231  22.215686  21.318738  20.480896  21.055235
42    5.791602   5.772140   5.888960   5.855109
238  26.939160  26.544405  26.650200  26.654791
119   8.108340   8.082271   8.143140   8.161888
146   9.325512   9.279800   9.064978   9.229617


In [12]:
# ─── 1) Data Reading ─────────────────────────────────────────────
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
df = pd.read_csv('../data/processed/merged_analysis.csv')


# convert datetime
for col in ['Week', 'Date']:
    df[col] = pd.to_datetime(df[col], errors='coerce')
    med = df[col].median()
    df[col] = df[col].fillna(med)
    df[f"{col}_ord"] = df[col].map(lambda d: d.toordinal())
    df.drop(col, axis=1, inplace=True)

# target
X = df[['Week_ord', 'Date_ord']]
y = df['FİYAT'].copy().dropna()
X = X.loc[y.index]

# ─── 2) Train / Test  ───────────────────────────────────────────────────
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ─── 3) Scalar ────────────────────────────────────────────────────────
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# ─── 4) Models ──────────────────────────────────────────
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree      import DecisionTreeRegressor
from sklearn.ensemble  import RandomForestRegressor

models = {
    'KNN (k=5)'       : KNeighborsRegressor(n_neighbors=5),
    'Decision Tree'   : DecisionTreeRegressor(random_state=42),
    'Random Forest'   : RandomForestRegressor(n_estimators=100, random_state=42),
}

preds = {}
for name, mdl in models.items():
    mdl.fit(X_train_s, y_train)
    preds[name] = mdl.predict(X_test_s)

# ─── 5) Metrics ───────────────────────────────────────────────
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

results = []
for name, y_pred in preds.items():
    r2   = r2_score(y_test, y_pred)
    mse  = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y_test, y_pred)
    results.append((name, r2, mse, rmse, mae))

metrics_df = pd.DataFrame(
    results,
    columns=['Model', 'R²', 'MSE', 'RMSE', 'MAE']
).set_index('Model')

print(metrics_df)

# ─── 6) Comparison ────────────────────────────────
sample_idx = np.random.RandomState(42).choice(len(y_test), 10, replace=False)
comp = pd.DataFrame({
    'Real': y_test.values[sample_idx],
    ' (KNN)': preds['KNN (k=5)'][sample_idx],
    ' (DT)':   preds['Decision Tree'][sample_idx],
    '(RF)':   preds['Random Forest'][sample_idx],
}, index=y_test.index[sample_idx])

print("\n10 Random Sample Comparison For Gold Prices:\n", comp)


                     R²          MSE       RMSE        MAE
Model                                                     
KNN (k=5)      0.987554  5431.945234  73.701732  49.926267
Decision Tree  0.977240  9933.514247  99.667017  55.076667
Random Forest  0.992012  3486.366967  59.045465  31.179620

10 Random Sample Comparison For Gold Prices:
         Real     (KNN)     (DT)       (RF)
52    297.91   337.086   315.32   303.6371
39    280.10   279.826   277.11   276.2929
95    478.84   467.988   462.72   473.7415
21    259.50   255.064   259.37   258.1561
78    413.81   416.360   382.84   426.1252
204  1091.54  1079.198  1047.70  1077.7337
217  1201.91  1321.064  1153.58  1188.5121
0     225.32   244.630   224.94   229.2215
234  1689.14  1557.950  1495.28  1567.2401
243  1710.53  1772.132  1795.05  1768.0335
