In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input

In [2]:
# Creating a sample dataset
np.random.seed(42)
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(365)]  # One year of data
products = np.random.choice(['Mountain', 'Road', 'Hybrid'], size=365)
quantities = np.random.randint(5, 30, size=365)  # Daily sales
prices = np.random.normal(1, 0.1, size=365) * np.where(products == 'Mountain', 500, np.where(products == 'Road', 800, 650))

df = pd.DataFrame({
    'date_ordered': dates,
    'product': products,
    'quantity': quantities,
    'price': prices
})

# Aggregate data to weekly sales
df['week'] = df['date_ordered'].dt.to_period('W')
weekly_sales = df.groupby(['week', 'product']).agg({'quantity': 'sum', 'price': 'mean'}).reset_index()
weekly_sales['date_ordered'] = weekly_sales['week'].dt.start_time
weekly_sales = weekly_sales.drop('week', axis=1)
print(df)

    date_ordered   product  quantity        price                   week
0     2023-01-01    Hybrid        12   551.552635  2022-12-26/2023-01-01
1     2023-01-02  Mountain        18   533.982983  2023-01-02/2023-01-08
2     2023-01-03    Hybrid        28   642.018731  2023-01-02/2023-01-08
3     2023-01-04    Hybrid        22   692.178061  2023-01-02/2023-01-08
4     2023-01-05  Mountain        19   539.013758  2023-01-02/2023-01-08
..           ...       ...       ...          ...                    ...
360   2023-12-27      Road        21   856.929812  2023-12-25/2023-12-31
361   2023-12-28    Hybrid        13   563.371609  2023-12-25/2023-12-31
362   2023-12-29      Road         5  1020.860955  2023-12-25/2023-12-31
363   2023-12-30      Road        25   782.745158  2023-12-25/2023-12-31
364   2023-12-31    Hybrid        24   628.987756  2023-12-25/2023-12-31

[365 rows x 5 columns]


In [3]:

try:
    # For newer versions of scikit-learn
    column_transformer = ColumnTransformer([
        ('ohe', OneHotEncoder(sparse_output=False), ['product']),
        ('scaler', StandardScaler(), ['quantity', 'price'])
    ])
except TypeError:
    # For older versions of scikit-learn
    column_transformer = ColumnTransformer([
        ('ohe', OneHotEncoder(sparse=False), ['product']),
        ('scaler', StandardScaler(), ['quantity', 'price'])
    ])

# Fit and transform the data
data = column_transformer.fit_transform(weekly_sales)

n_features = data.shape[1]

# Create sequences (use last 4 weeks to predict the next week)
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        end_ix = i + n_steps
        seq_x = data[i:end_ix]
        seq_y = data[end_ix, 1:4]  # Predict quantities for all 3 products
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

n_steps = 4
X, y = create_sequences(data, n_steps)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model definition
model = Sequential([
    Input(shape=(n_steps, n_features)),
    LSTM(64, activation='relu', return_sequences=True),
    LSTM(32, activation='relu'),
    Dense(3)  # Output layer with 3 units (one for each product)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Fit the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1)

# Model evaluation
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# Make predictions
predictions = model.predict(X_test)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 149ms/step - loss: 0.5811 - mae: 0.5127 - val_loss: 0.4147 - val_mae: 0.4660
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.5531 - mae: 0.4993 - val_loss: 0.4048 - val_mae: 0.4694
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.5866 - mae: 0.5236 - val_loss: 0.3956 - val_mae: 0.4731
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.5599 - mae: 0.5183 - val_loss: 0.3859 - val_mae: 0.4770
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.5486 - mae: 0.5069 - val_loss: 0.3760 - val_mae: 0.4814
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.5374 - mae: 0.5129 - val_loss: 0.3655 - val_mae: 0.4863
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.5186 - mae: 

In [4]:
def inverse_transform_predictions(column_transformer, X_test, predictions):
    # Get the OneHotEncoder and StandardScaler from the ColumnTransformer
    ohe = column_transformer.named_transformers_['ohe']
    scaler = column_transformer.named_transformers_['scaler']
    
    print(f"Shape of X_test: {X_test.shape}")
    print(f"Shape of predictions: {predictions.shape}")
    
    # Inverse transform the one-hot encoded product
    product_inverse = ohe.inverse_transform(X_test[:, -1, :3])
    print(f"Shape of product_inverse: {product_inverse.shape}")
    
    # Inverse transform the scaled quantities
    try:
        quantities_inverse = scaler.inverse_transform(predictions)
        print(f"Shape of quantities_inverse: {quantities_inverse.shape}")
    except ValueError as e:
        print(f"Error in inverse_transform: {e}")
        print(f"Shape of scaler.scale_: {scaler.scale_.shape}")
        print(f"Shape of scaler.mean_: {scaler.mean_.shape}")
    
    # For debugging, return the shapes
    return {
        'X_test_shape': X_test.shape,
        'predictions_shape': predictions.shape,
        'product_inverse_shape': product_inverse.shape,
    }

# Use the updated function
debug_info = inverse_transform_predictions(column_transformer, X_test, predictions)
print("Debug info:", debug_info)

Shape of X_test: (29, 4, 5)
Shape of predictions: (29, 3)
Shape of product_inverse: (29, 1)
Error in inverse_transform: operands could not be broadcast together with shapes (29,3) (2,) (29,3) 
Shape of scaler.scale_: (2,)
Shape of scaler.mean_: (2,)
Debug info: {'X_test_shape': (29, 4, 5), 'predictions_shape': (29, 3), 'product_inverse_shape': (29, 1)}


In [5]:
def inverse_transform_predictions(column_transformer, X_test, predictions, dates):
    # Get the OneHotEncoder and StandardScaler from the ColumnTransformer
    ohe = column_transformer.named_transformers_['ohe']
    scaler = column_transformer.named_transformers_['scaler']
    
    # Inverse transform the one-hot encoded product
    product_inverse = ohe.inverse_transform(X_test[:, -1, :3])
    
    # Inverse transform the scaled quantities
    quantities_inverse = predictions * scaler.scale_[0] + scaler.mean_[0]
    
    # Combine the results
    result = []
    for date, prods, quants in zip(dates, product_inverse, quantities_inverse):
        result.extend((date, prod, quant) for prod, quant in zip(prods, quants))
    
    return np.array(result, dtype=object)

# Assuming you have a dates array corresponding to your predictions
# If not, you'll need to create this based on your data
dates = pd.date_range(start='2023-01-01', periods=len(predictions), freq='W')

# Use the updated function
predictions_original_scale = inverse_transform_predictions(column_transformer, X_test, predictions, dates)

print("Sample predictions (date, product, quantity):")
for date, product, quantity in predictions_original_scale[:15]:
    print(f"{date.strftime('%Y-%m-%d')}: {product}: {float(quantity):.0f}")

# Additional debugging information
print("\nShape of predictions_original_scale:", predictions_original_scale.shape)

Sample predictions (date, product, quantity):
2023-01-01: Hybrid: 54
2023-01-08: Hybrid: 52
2023-01-15: Mountain: 43
2023-01-22: Road: 57
2023-01-29: Hybrid: 56
2023-02-05: Hybrid: 56
2023-02-12: Road: 50
2023-02-19: Hybrid: 54
2023-02-26: Hybrid: 52
2023-03-05: Mountain: 52
2023-03-12: Hybrid: 52
2023-03-19: Road: 53
2023-03-26: Hybrid: 54
2023-04-02: Road: 46
2023-04-09: Hybrid: 51

Shape of predictions_original_scale: (29, 3)


In [6]:
import pandas as pd
import numpy as np
from datetime import datetime

def get_potential_influence(date):
    """Determine potential influences based on the date."""
    month = date.month
    day = date.day
    
    if month == 12 or (month == 1 and day <= 7):
        return "Christmas/New Year"
    elif month == 11 and day >= 20:
        return "Black Friday/Cyber Monday"
    elif 3 <= month <= 5:
        return "Spring Season"
    elif 6 <= month <= 8:
        return "Summer Season"
    elif month == 9:
        return "Back to School"
    else:
        return "No major influence"

def inverse_transform_predictions(column_transformer, X_test, predictions, dates):
    # Get the OneHotEncoder and StandardScaler from the ColumnTransformer
    ohe = column_transformer.named_transformers_['ohe']
    scaler = column_transformer.named_transformers_['scaler']
    
    # Inverse transform the one-hot encoded product
    product_inverse = ohe.inverse_transform(X_test[:, -1, :3])
    
    # Inverse transform the scaled quantities
    quantities_inverse = predictions * scaler.scale_[0] + scaler.mean_[0]
    
    # Combine the results
    result = []
    for date, prods, quants in zip(dates, product_inverse, quantities_inverse):
        influence = get_potential_influence(date)
        result.extend((date, prod, quant, influence) for prod, quant in zip(prods, quants))
    
    return np.array(result, dtype=object)

# Assuming you have a dates array corresponding to your predictions
# If not, you'll need to create this based on your data
dates = pd.date_range(start='2023-01-01', periods=len(predictions), freq='W')

# Use the updated function
predictions_original_scale = inverse_transform_predictions(column_transformer, X_test, predictions, dates)

print("Sample predictions (date, product, quantity, potential influence):")
for date, product, quantity, influence in predictions_original_scale[:15]:
    print(f"{date.strftime('%Y-%m-%d')}: {product}: {float(quantity):.0f} - {influence}")

# Additional debugging information
print("\nShape of predictions_original_scale:", predictions_original_scale.shape)

Sample predictions (date, product, quantity, potential influence):
2023-01-01: Hybrid: 54 - Christmas/New Year
2023-01-08: Hybrid: 52 - No major influence
2023-01-15: Mountain: 43 - No major influence
2023-01-22: Road: 57 - No major influence
2023-01-29: Hybrid: 56 - No major influence
2023-02-05: Hybrid: 56 - No major influence
2023-02-12: Road: 50 - No major influence
2023-02-19: Hybrid: 54 - No major influence
2023-02-26: Hybrid: 52 - No major influence
2023-03-05: Mountain: 52 - Spring Season
2023-03-12: Hybrid: 52 - Spring Season
2023-03-19: Road: 53 - Spring Season
2023-03-26: Hybrid: 54 - Spring Season
2023-04-02: Road: 46 - Spring Season
2023-04-09: Hybrid: 51 - Spring Season

Shape of predictions_original_scale: (29, 4)
