# Brew

In [2]:
!pip install pandas
!pip install pystan==2.19.1.1
!pip install prophet
!pip install boto3

Collecting pystan==2.19.1.1
  Using cached pystan-2.19.1.1.tar.gz (16.2 MB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting Cython!=0.25.1,>=0.22 (from pystan==2.19.1.1)
  Using cached cython-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.5 kB)
Using cached cython-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
Building wheels for collected packages: pystan
  Building wheel for pystan (setup.py) ... [?25lerror
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[3 lines of output][0m
  [31m   [0m   self.version = node.value.s
  [31m   [0m Cython>=0.22 and NumPy are required.
  [31m   [0m [31m[end of output][0m
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
[31m  ERROR: Failed building wheel for pystan[0m[31m
[

In [3]:
import pandas as pd
import boto3
from prophet import Prophet
import numpy as np
import os


In [4]:
bucket = 'phppos-retail-data'    # change to your S3 bucket name
s3 = boto3.client('s3')
input_key = 'phppos_sales.csv'   # change as needed yep

# Download file from S3 to local temp
local_file = '/tmp/phppos_sales.csv'
s3.download_file(bucket, input_key, local_file)

In [5]:
# 2. LOAD DATA

cols_as_str = [5, 12, 29, 37, 38, 40, 41, 53, 54, 63]   # zero-based positions yep

df = pd.read_csv(
    local_file,
    dtype={col: str for col in cols_as_str},
    low_memory=False
)
print("Columns forced to string:", df.dtypes.iloc[[*cols_as_str]])

df['sale_time'] = pd.to_datetime(df['sale_time'])

df_daily = df.groupby(df['sale_time'].dt.date).agg({'total': 'sum'}).reset_index()
df_daily.columns = ['ds', 'y']
df_daily['ds'] = pd.to_datetime(df_daily['ds'])

Columns forced to string: discount_reason                      object
auth_code                            object
deleted_taxes                        object
exchange_currency_symbol             object
exchange_currency_symbol_location    object
exchange_thousands_separator         object
exchange_decimal_point               object
last_modified                        object
override_taxes                       object
updated_at                           object
dtype: object


In [6]:
# 3. FORECAST FUNCTION

def forecast_original(df_raw, periods: int = 30,
                      outlier_cap: float = 30_000,
                      recent_months: int = 3,
                      dup_factor: int = 3):
    df = df_raw.copy()
    df['sale_time'] = pd.to_datetime(df['sale_time'])

    daily = (df.groupby(df['sale_time'].dt.date)['total']
               .sum()
               .reset_index()
               .rename(columns={'sale_time': 'ds', 'total': 'y'}))
    daily['ds'] = pd.to_datetime(daily['ds'])
    daily = daily[daily['y'] < outlier_cap]

    latest = daily['ds'].max()
    nine_month_cut = latest - pd.DateOffset(months=9)
    df_9m = daily[daily['ds'] >= nine_month_cut]

    recent_cut = latest - pd.DateOffset(months=recent_months)
    df_recent = df_9m[df_9m['ds'] >= recent_cut]
    df_weighted = pd.concat([df_9m] + [df_recent] * dup_factor,
                            ignore_index=True)

    m = Prophet(interval_width=0.85,
                daily_seasonality=False,
                changepoint_prior_scale=0.8,
                changepoint_range=0.98,
                seasonality_mode='multiplicative')
    m.fit(df_weighted)

    future = m.make_future_dataframe(periods=periods, freq='D')
    forecast = m.predict(future)
    fc_future = forecast[forecast['ds'] > df_weighted['ds'].max()][
        ['ds', 'yhat', 'yhat_lower', 'yhat_upper']
    ]

    summary = dict(
        avg_daily = fc_future['yhat'].mean(),
        max_daily = fc_future['yhat'].max(),
        total_est = fc_future['yhat'].sum(),
        total_low = fc_future['yhat_lower'].sum(),
        total_up  = fc_future['yhat_upper'].sum(),
        days      = len(fc_future)
    )
    return fc_future, summary

In [7]:
# 4. PER-LOCATION FORECASTING
# ————————————————————————————————————————————
location_ids = df['location_id'].unique()
forecasts_original = {}
summaries_original = {}

for loc in location_ids:
    df_loc = df[df['location_id'] == loc]
    fc, sm = forecast_original(df_loc, periods=30)
    forecasts_original[loc] = fc
    summaries_original[loc] = sm

fc_total, sm_total = forecast_original(df, periods=30)
summaries_original['ALL'] = sm_total

07:51:51 - cmdstanpy - INFO - Chain [1] start processing
07:51:51 - cmdstanpy - INFO - Chain [1] done processing
07:51:52 - cmdstanpy - INFO - Chain [1] start processing
07:51:54 - cmdstanpy - INFO - Chain [1] done processing
07:51:54 - cmdstanpy - INFO - Chain [1] start processing
07:51:54 - cmdstanpy - INFO - Chain [1] done processing


In [8]:
# 5. PRINT RESULTS
# ————————————————————————————————————————————
all_summaries = []

for loc, sm in summaries_original.items():
    all_summaries.append({
        'location': 'ALL LOCATIONS' if loc=='ALL' else loc,
        **sm
    })
    print(f"""
📍 Location {'ALL LOCATIONS' if loc=='ALL' else loc}
📊 Sales Forecast Summary (Next {sm['days']} Days):

- 📈 Average daily forecasted sales: ${sm['avg_daily']:,.0f}
- 🔺 Highest predicted daily sales: ${sm['max_daily']:,.0f}

- 💰 Total expected sales (best estimate): ${sm['total_est']:,.0f}
- 📉 Lower bound (cautious estimate): ${sm['total_low']:,.0f}
- 🔝 Upper bound (optimistic estimate): ${sm['total_up']:,.0f}

🛒 Recommended inventory plan:
Prepare stock for around ${sm['total_est']*0.95:,.0f} in sales and monitor performance weekly.
""")



📍 Location 1
📊 Sales Forecast Summary (Next 30 Days):

- 📈 Average daily forecasted sales: $3,395
- 🔺 Highest predicted daily sales: $5,121

- 💰 Total expected sales (best estimate): $101,859
- 📉 Lower bound (cautious estimate): $39,443
- 🔝 Upper bound (optimistic estimate): $163,619

🛒 Recommended inventory plan:
Prepare stock for around $96,766 in sales and monitor performance weekly.


📍 Location 2
📊 Sales Forecast Summary (Next 30 Days):

- 📈 Average daily forecasted sales: $35
- 🔺 Highest predicted daily sales: $111

- 💰 Total expected sales (best estimate): $1,042
- 📉 Lower bound (cautious estimate): $1,024
- 🔝 Upper bound (optimistic estimate): $1,067

🛒 Recommended inventory plan:
Prepare stock for around $990 in sales and monitor performance weekly.


📍 Location ALL LOCATIONS
📊 Sales Forecast Summary (Next 30 Days):

- 📈 Average daily forecasted sales: $3,395
- 🔺 Highest predicted daily sales: $5,121

- 💰 Total expected sales (best estimate): $101,859
- 📉 Lower bound (cautiou

In [9]:
# 6. SAVE SUMMARIES/FORECASTS TO S3 (results only)
# ————————————————————————————————————————————
results_summary_df = pd.DataFrame(all_summaries)
summary_key = 'results/sales_forecast_summary.csv'
results_summary_df.to_csv('/tmp/sales_forecast_summary.csv', index=False)
s3.upload_file('/tmp/sales_forecast_summary.csv', bucket, summary_key)

# (Optional) Save detailed per-location forecasts
for loc, fc in forecasts_original.items():
    out_path = f'/tmp/forecast_{loc}.csv'
    fc.to_csv(out_path, index=False)
    s3.upload_file(out_path, bucket, f'results/forecast_{loc}.csv')

# ————————————————————————————————————————————
# 7. CLEAN UP LOCAL TEMP FILES
# ————————————————————————————————————————————
# os.remove(local_file)
# os.remove('/tmp/sales_forecast_summary.csv')
# for loc in forecasts_original:
    # os.remove(f'/tmp/forecast_{loc}.csv')

# print("✅ Forecast results written to S3. Local files cleaned up.")