In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

## Loading dataset

In [None]:
# Generating sample sales data
date_range = pd.date_range(start='2020-01-01', periods=100, freq='D')
np.random.seed(42)
sales_data = np.random.poisson(lam=200, size=100)

# Creating a DataFrame
df = pd.DataFrame({'Date': date_range, 'Sales': sales_data})
df.set_index('Date', inplace=True)

# Displaying the first few rows
df.head()

## Visualise sales data

In [None]:
plt.figure(figsize=(10,6))
plt.plot(df.index, df['Sales'], label='Sales Data')
plt.title('Sales Data Over Time')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

## Data preprocessing

In [None]:
df.isnull().sum()

df_diff = df['Sales'].diff().dropna()

plt.figure(figsize=(10,6))
plt.plot(df_diff, label='Differenced Sales Data')
plt.title('Differenced Sales Data Over Time')
plt.xlabel('Date')
plt.ylabel('Sales Difference')
plt.legend()
plt.show()

## Appling ARIMA for forecasting

In [None]:
train_size = int(len(df) * 0.8)
train, test = df['Sales'][:train_size], df['Sales'][train_size:]

model = ARIMA(train, order=(5, 1, 0))
arima_model = model.fit()

print(arima_model.summary())

### (as for the forecasting future sales...)

In [None]:
# Making predictions on the test set
forecast = arima_model.forecast(steps=len(test))

# Plotting predictions against the actual test data
plt.figure(figsize=(10,6))
plt.plot(test.index, test, label='Actual Sales', marker='o')
plt.plot(test.index, forecast, label='Forecasted Sales', color='red', linestyle='--')
plt.title('Actual vs Forecasted Sales')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()

## Evaluating model

In [None]:
# Calculating performance metrics
mae = mean_absolute_error(test, forecast)
rmse = np.sqrt(mean_squared_error(test, forecast))

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Root Mean Squared Error (RMSE): {rmse}')

## Future sales forecasting:

In [None]:
future_forecast = arima_model.forecast(steps=30)

future_dates = pd.date_range(start=test.index[-1], periods=30, freq='D')
forecast_df = pd.DataFrame({'Date': future_dates, 'Forecasted Sales': future_forecast})

plt.figure(figsize=(10,6))
plt.plot(df.index, df['Sales'], label='Historical Sales')
plt.plot(forecast_df['Date'], forecast_df['Forecasted Sales'], label='Future Sales Forecast', color='orange', linestyle='--')
plt.title('Sales Forecast for Next 30 Days')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.show()