# Local Retail Demand Forecasting for Kirana Stores

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Loading the dataset
file_path = "kiranaRO_train.csv"  # <-- Make sure this file is in the same folder
df = pd.read_csv(file_path)

# Converting InvoiceDate to datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'], errors='coerce')

# Dropping rows with missing or invalid values
df.dropna(subset=['InvoiceDate', 'StockCode', 'Description', 'Quantity', 'UnitPrice'], inplace=True)
df = df[df['Quantity'] > 0]
df = df[df['UnitPrice'] > 0]

# Cleaning descriptions and add TotalPrice
df['Description'] = df['Description'].str.strip().str.lower()
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']


## Feature Engineering

In [None]:
# Aggregating demand per product per day
df['InvoiceDay'] = df['InvoiceDate'].dt.date
daily_demand = (
    df.groupby(['InvoiceDay', 'StockCode'])
    .agg({'Quantity': 'sum', 'TotalPrice': 'sum'})
    .reset_index()
)

# Sorting and preparing features
daily_demand['InvoiceDay'] = pd.to_datetime(daily_demand['InvoiceDay'])
daily_demand['dayofweek'] = daily_demand['InvoiceDay'].dt.dayofweek
daily_demand['month'] = daily_demand['InvoiceDay'].dt.month
daily_demand['StockCode'] = daily_demand['StockCode'].astype('category').cat.codes


## Model Training and Evaluation

In [None]:
# Preparing features and target
features = ['StockCode', 'dayofweek', 'month']
X = daily_demand[features]
y = daily_demand['Quantity']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modeling training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE: {rmse:.2f}")

# predictions :)
results = X_test.copy()
results['ActualQuantity'] = y_test
results['PredictedQuantity'] = y_pred
results.head()


RMSE: 52.31


Unnamed: 0,StockCode,dayofweek,month,ActualQuantity,PredictedQuantity
74160,185,2,4,1,6.294083
70402,423,2,4,2,9.226667
219413,1675,4,11,2,8.825
147205,2146,6,8,1,12.591083
78841,1597,2,4,8,16.591631
