In [8]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.impute import SimpleImputer



import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


In [9]:
train = pd.read_csv("train_split_merged_expanded_data.csv", parse_dates=["date"])
val   = pd.read_csv("val_split_merged_expanded_data.csv", parse_dates=["date"])
test  = pd.read_csv("test_split_merged_expanded_data_filtered.csv", parse_dates=["date"])

print("Train:", train.shape, "Val:", val.shape, "Test:", test.shape)
print("Train dates:", train["date"].min(), "→", train["date"].max())
print("Val dates:", val["date"].min(), "→", val["date"].max())
print("Test dates:", test["date"].min(), "→", test["date"].max())


Train: (7475, 11) Val: (1840, 11) Test: (1830, 11)
Train dates: 2013-07-01 00:00:00 → 2017-07-31 00:00:00
Val dates: 2017-08-01 00:00:00 → 2018-07-31 00:00:00
Test dates: 2018-08-01 00:00:00 → 2019-07-30 00:00:00


In [10]:
def add_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["date"] = pd.to_datetime(df["date"])

    # -------------------------
    # Calendar features
    # -------------------------
    df["Wochentag"] = df["date"].dt.day_name()
    df["Month"]     = df["date"].dt.month
    df["dayofyear"] = df["date"].dt.dayofyear

    # Seasonality encoding (cyclical)
    df["sin_season"] = np.sin(2 * np.pi * df["dayofyear"] / 365)
    df["cos_season"] = np.cos(2 * np.pi * df["dayofyear"] / 365)

    df["is_weekend"] = df["Wochentag"].isin(["Saturday", "Sunday"]).astype(int)

    # Integer/calendar flags (force 0/1 and no NaNs)
    for col in ["KielerWoche", "school_holiday", "public_holiday"]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)

    return df

train_fe = add_features(train)
val_fe   = add_features(val)
test_fe  = add_features(test)

print("Feature engineering done.")


Feature engineering done.


In [11]:
train_fe["dataset"] = "train"
val_fe["dataset"]   = "val"
test_fe["dataset"]  = "test"

df_all = pd.concat([train_fe, val_fe, test_fe], ignore_index=True)
df_all = df_all.sort_values(["warengruppe", "date"])

# Lags of target (umsatz) capture short-term and weekly memory
for lag in [1, 2, 7, 14]:
    df_all[f"lag_{lag}"] = df_all.groupby("warengruppe")["umsatz"].shift(lag)

# Rolling mean & std (shift(1) avoids leakage)
for window in [7, 14, 30]:
    df_all[f"roll{window}_mean"] = (
        df_all.groupby("warengruppe")["umsatz"].shift(1).rolling(window).mean()
    )
    df_all[f"roll{window}_std"] = (
        df_all.groupby("warengruppe")["umsatz"].shift(1).rolling(window).std()
    )

# One-hot weekday (done on ALL so columns match across splits)
df_all = pd.get_dummies(df_all, columns=["Wochentag"], drop_first=True)

train_fe = df_all[df_all["dataset"] == "train"].copy()
val_fe   = df_all[df_all["dataset"] == "val"].copy()
test_fe  = df_all[df_all["dataset"] == "test"].copy()

print("Lag + rolling + one-hot done.")


Lag + rolling + one-hot done.


In [12]:
weekday_cols = [c for c in train_fe.columns if c.startswith("Wochentag_")]

feature_cols = [
    "Temperatur",
    "KielerWoche",
    "school_holiday",
    "public_holiday",
    "Month",
    "sin_season",
    "cos_season",
    "is_weekend",
    "lag_1",
    "lag_2",
    "lag_7",
    "lag_14",
    "roll7_mean",
    "roll7_std",
    "roll14_mean",
    "roll14_std",
    "roll30_mean",
    "roll30_std",
] + weekday_cols

target_col = "umsatz"

print("Number of features:", len(feature_cols))


Number of features: 24


In [13]:
def build_model(input_dim: int) -> tf.keras.Model:
    model = Sequential([
        Dense(64, activation="relu", input_shape=(input_dim,)),
        Dropout(0.1),
        Dense(32, activation="relu"),
        Dense(1)
    ])

    model.compile(
        optimizer="adam",
        loss="mse"
    )
    return model



In [14]:
product_groups = sorted(train_fe["warengruppe"].dropna().unique())

pred_list = []
models_by_wg = {}
results = []

for wg in product_groups:
    print("\n==============================")
    print(f" Warengruppe {wg}")
    print("==============================")

    train_wg = train_fe[train_fe["warengruppe"] == wg].copy()
    val_wg   = val_fe[val_fe["warengruppe"] == wg].copy()
    test_wg  = test_fe[test_fe["warengruppe"] == wg].copy()

    # Drop rows without target in train/val
    train_wg = train_wg.dropna(subset=[target_col])
    val_wg   = val_wg.dropna(subset=[target_col])

    # Drop rows with missing FEATURES in train/val only
    train_wg = train_wg.dropna(subset=feature_cols)
    val_wg   = val_wg.dropna(subset=feature_cols)

    # DO NOT drop test rows

    print(f"Rows: train={len(train_wg)}, val={len(val_wg)}, test={len(test_wg)}")

    if len(train_wg) < 50 or len(val_wg) < 20:
        print("⚠️ Too few rows for stable NN training → skipping this WG.")
        continue

    if len(test_wg) == 0:
        print("⚠️ No test rows for this WG → skipping prediction.")
        continue

    # ✅ Build X/y AFTER filtering
    X_train = train_wg[feature_cols].to_numpy()
    y_train = train_wg[target_col].to_numpy()

    X_val   = val_wg[feature_cols].to_numpy()
    y_val   = val_wg[target_col].to_numpy()

    X_test  = test_wg[feature_cols].to_numpy()

    # ✅ Impute
    imputer = SimpleImputer(strategy="mean")
    X_train = imputer.fit_transform(X_train)
    X_val   = imputer.transform(X_val)
    X_test  = imputer.transform(X_test)

    # ✅ Scale
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val   = scaler.transform(X_val)
    X_test  = scaler.transform(X_test)

    # Train
    model = build_model(input_dim=X_train.shape[1])

    es = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,
        batch_size=32,
        callbacks=[es],
        verbose=0
    )

    # Evaluate
    y_train_pred = model.predict(X_train, verbose=0).ravel()
    y_val_pred   = model.predict(X_val, verbose=0).ravel()

    r2_train = r2_score(y_train, y_train_pred)
    r2_val   = r2_score(y_val, y_val_pred)

    mse_train = mean_squared_error(y_train, y_train_pred)
    mae_train = mean_absolute_error(y_train, y_train_pred)

    mse_val = mean_squared_error(y_val, y_val_pred)
    mae_val = mean_absolute_error(y_val, y_val_pred)

    print(f"NN R² (train): {r2_train:.3f} | MSE: {mse_train:.2f} | MAE: {mae_train:.2f}")
    print(f"NN R² (val):   {r2_val:.3f} | MSE: {mse_val:.2f} | MAE: {mae_val:.2f}")

    results.append({
    "warengruppe": wg,
    "n_val": len(val_wg),
    "r2_train": r2_train,
    "r2_val": r2_val,
    "mse_train": mse_train,
    "mae_train": mae_train,
    "mse_val": mse_val,
    "mae_val": mae_val
})


    # Predict test
    y_test_pred = model.predict(X_test, verbose=0).ravel()

    pred_list.append(pd.DataFrame({
        "id": test_wg["id"].values,
        "umsatz_Prediction": y_test_pred
    }))

    models_by_wg[wg] = (model, scaler, imputer)

# ==============================
# Combined (weighted) evaluation
# ==============================
results_df = pd.DataFrame(results).sort_values("warengruppe")

weighted_r2_train = (
    (results_df["r2_train"] * results_df["n_val"]).sum()
    / results_df["n_val"].sum()
)

weighted_r2_val = (
    (results_df["r2_val"] * results_df["n_val"]).sum()
    / results_df["n_val"].sum()
)

print("\n==============================")
print(" Combined (weighted) metrics ")
print("==============================")
print(f"Weighted R² (train): {weighted_r2_train:.3f}")
print(f"Weighted R² (val):   {weighted_r2_val:.3f}")

display(results_df)  # optional (notebook only)


# Submission
if len(pred_list) == 0:
    raise ValueError("No predictions generated. Check test split and IDs.")

submission = pd.concat(pred_list, ignore_index=True)
submission = submission.dropna(subset=["id"]).copy()
submission["id"] = submission["id"].astype(int)
submission = submission.sort_values("id")
submission.to_csv("submission_neural_net.csv", index=False)

print("\nSaved: submission_neural_net.csv")
submission.head()


 Warengruppe 1
Rows: train=1432, val=357, test=355


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2026-01-06 13:33:04.456461: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


NN R² (train): 0.629 | MSE: 590.07 | MAE: 17.95
NN R² (val):   0.526 | MSE: 849.74 | MAE: 21.77

 Warengruppe 2
Rows: train=1432, val=357, test=355


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NN R² (train): 0.880 | MSE: 2274.56 | MAE: 32.44
NN R² (val):   0.890 | MSE: 1770.26 | MAE: 32.32

 Warengruppe 3
Rows: train=1432, val=357, test=355


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NN R² (train): 0.864 | MSE: 734.83 | MAE: 18.87
NN R² (val):   0.868 | MSE: 757.80 | MAE: 20.65

 Warengruppe 4
Rows: train=1379, val=357, test=354


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NN R² (train): 0.596 | MSE: 568.07 | MAE: 16.80
NN R² (val):   0.018 | MSE: 687.28 | MAE: 20.16

 Warengruppe 5
Rows: train=1432, val=357, test=355


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NN R² (train): 0.321 | MSE: 7059.57 | MAE: 37.55
NN R² (val):   0.205 | MSE: 6171.55 | MAE: 44.50

 Warengruppe 6
Rows: train=188, val=55, test=56


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NN R² (train): 0.501 | MSE: 544.53 | MAE: 17.81
NN R² (val):   0.485 | MSE: 501.87 | MAE: 17.58

 Combined (weighted) metrics 
Weighted R² (train): 0.653
Weighted R² (val):   0.501


Unnamed: 0,warengruppe,n_val,r2_train,r2_val,mse_train,mae_train,mse_val,mae_val
0,1,357,0.628801,0.526486,590.073058,17.950625,849.735055,21.771578
1,2,357,0.879963,0.889854,2274.560588,32.436344,1770.257729,32.316728
2,3,357,0.864297,0.867996,734.83001,18.870243,757.795823,20.651716
3,4,357,0.595968,0.017907,568.071318,16.798568,687.276524,20.161902
4,5,357,0.321235,0.204896,7059.56546,37.549809,6171.549949,44.504402
5,6,55,0.501357,0.4851,544.529983,17.812382,501.874114,17.576968



Saved: submission_neural_net.csv


Unnamed: 0,id,umsatz_Prediction
0,1808011,140.604126
355,1808012,624.095398
710,1808013,318.323212
1065,1808014,78.437088
1419,1808015,284.120148
