In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import pickle

In [2]:
df_recycle=pd.read_csv("WasteManagementAndOverallRecyclingRatesAnnual.csv")
df_recycle.head()

Unnamed: 0,DataSeries,2024,2023,2022,2021,2020,2019,2018,2017,2016,...,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000
0,Total Generated,6658000,6859000,7385000,6944000,5880000,7278000,7759000,7704000,7814000,...,6114000,5970000,5601000,5221000,5018000,4790000,4728000,4809000,5035000,4643000
1,Construction & Demolition,708000,832000,1424000,1013000,825000,1440000,1624000,1609000,1595000,...,1176000,922000,778000,619000,493000,509000,423000,407000,351000,319000
2,Ferrous Metal,1275000,1296000,1338000,1312000,934000,1278000,1269000,1379000,1358000,...,872000,785000,737000,726000,818000,720000,857000,840000,1115000,989000
3,Non-Ferrous Metal,103000,106000,92000,88000,75000,126000,171000,94000,97000,...,58000,85000,92000,87000,89000,87000,94000,101000,119000,90000
4,Used Slag,112000,176000,169000,182000,106000,174000,246000,273000,251000,...,422000,566000,527000,471000,382000,267000,260000,298000,227000,219000


In [3]:
df_recycle.columns=df_recycle.columns.astype(str).str.strip()

In [4]:
cols_year = [c for c in df_recycle.columns if c.isdigit()]
print(cols_year[:5], "...", cols_year[-5:])

['2024', '2023', '2022', '2021', '2020'] ... ['2004', '2003', '2002', '2001', '2000']


In [5]:
df_long = df_recycle.melt(
    id_vars=["DataSeries"],
    value_vars=cols_year,
    var_name="Year",
    value_name="Value"
)


In [6]:
df_long["Year"] = df_long["Year"].astype(int)
df_long["DataSeries"] = df_long["DataSeries"].str.strip()
df_long["Value"] = pd.to_numeric(df_long["Value"], errors="coerce")
df_long["Value"] = df_long["Value"].ffill()

In [7]:
df_long = df_long.sort_values(  ["DataSeries", "Year"]).reset_index(drop=True)

In [8]:
totals = df_long[
    df_long["DataSeries"].isin([
        "Total Generated",
        "Total Recycled",
        "Total Disposed"
    ])
].copy()

In [9]:
total_generated = df_long[df_long["DataSeries"] == "Total Generated"]
total_disposed = df_long[df_long["DataSeries"] == "Total Disposed"]

total_recycled = df_long[df_long["DataSeries"] == "Total Recycled"]

In [10]:
total_generated = (
    df_long[df_long["DataSeries"] == "Total Generated"]
    .sort_values("Year")
    .set_index("Year")["Value"]   
    .astype(float)
)


In [11]:
total_disposed = (
    df_long[df_long["DataSeries"] == "Total Disposed"]
    .sort_values("Year")
    .set_index("Year")["Value"]   
    .astype(float)
)

In [12]:
total_recycled = (
    df_long[df_long["DataSeries"] == "Total Recycled"]
    .sort_values("Year")
    .set_index("Year")["Value"]   
    .astype(float)
)

In [13]:


def split_train_test(data, test_size=3):
    train = data.iloc[:-test_size]
    test = data.iloc[-test_size:]
    return train, test

def evaluate_model(actual, predicted):
    mae = float(mean_absolute_error(actual, predicted))
    rmse = float(np.sqrt(mean_squared_error(actual, predicted)))
    mape = float(np.mean(np.abs((actual - predicted) / actual)) * 100)

    return {
        "MAE": round(mae, 2),
        "RMSE": round(rmse, 2),
        "MAPE": round(mape, 2)
    }

In [14]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

train, test = split_train_test(total_generated, test_size=3)

ses_model = SimpleExpSmoothing(train)

ses_fit = ses_model.fit()
ses_preds = ses_fit.forecast(len(test))
ses_preds.index = test.index   

ses = evaluate_model(test, ses_preds)

ses_full_model = SimpleExpSmoothing(total_generated
  
)

ses_full_fit = ses_full_model.fit()

forecast_years = 6
ses_future_forecast = ses_full_fit.forecast(steps=forecast_years)


print("SES METRICS")
ses



SES METRICS


  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(


{'MAE': 251028.79, 'RMSE': 317365.38, 'MAPE': 3.52}

In [15]:
from statsmodels.tsa.arima.model import ARIMA

train, test = split_train_test(total_disposed, test_size=3)

arima_model = ARIMA(train, order=(1, 1, 1))
arima_fit = arima_model.fit()
test_forecast = arima_fit.forecast(steps=len(test))
test_forecast.index = test.index
arima_metrics = evaluate_model(test, test_forecast)


arima_full = ARIMA(total_disposed, order=(1, 1, 1)).fit()
arima_forecast= arima_full.forecast(6)


print("ARIMA Metrics:")
arima_metrics


ARIMA Metrics:


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(


{'MAE': 168199.08, 'RMSE': 178784.73, 'MAPE': 5.1}

In [16]:
train, test = split_train_test(total_recycled, test_size=3)
weights = np.array([0.5, 0.3, 0.2])
no_weights = len(weights)

wma_preds = []
history = train.copy()

for _ in range(len(test)):
    pred = np.sum(weights * history[-no_weights:])
    wma_preds.append(pred)
    history = np.append(history, pred)

wma_metrics = evaluate_model(test, np.array(wma_preds))

    # Future forecast
future = history[-no_weights:]
wma_forecast = []
for _ in range(6):
    pred = np.sum(weights * future)
    wma_forecast.append(pred)
    future = np.append(future[1:], pred)

print("WMA METRICS")
wma_metrics

WMA METRICS


{'MAE': 301792.67, 'RMSE': 328706.86, 'MAPE': 8.25}

In [17]:
def calculate_waste_generated(
    population,
    per_capita_kg_per_day,
    gdp_billion,
    per_gdp_tonnes_per_day,
    days
):

    # Domestic waste (kg → tonnes)
    domestic_tonnes = (
        per_capita_kg_per_day * population * days
    ) / 1000

    # Non-domestic waste (already in tonnes)
    non_domestic_tonnes = (
        per_gdp_tonnes_per_day * gdp_billion * days
    )

    total_tonnes = domestic_tonnes + non_domestic_tonnes

    return {
        "domestic_generated_tonnes": round(domestic_tonnes, 2),
        "non_domestic_generated_tonnes": round(non_domestic_tonnes, 2),
        "total_generated_tonnes": round(total_tonnes, 2)
    }


In [18]:
result = calculate_waste_generated(
    population=6195860,
    per_capita_kg_per_day=0.85,
    gdp_billion=1.92,              # daily GDP equivalent
    per_gdp_tonnes_per_day=23,
    days=31
)

print(result)

total_generated_tonnes=result["total_generated_tonnes"]


{'domestic_generated_tonnes': 163260.91, 'non_domestic_generated_tonnes': 1368.96, 'total_generated_tonnes': 164629.87}


In [19]:
final_forecasts = {
    "SES": ses_future_forecast,
    "ARIMA": arima_forecast,
    "WMA": wma_forecast,
    "Total_tonnes":total_generated_tonnes
}
print(final_forecasts)

{'SES': 25    6.658000e+06
26    6.658000e+06
27    6.658000e+06
28    6.658000e+06
29    6.658000e+06
30    6.658000e+06
dtype: float64, 'ARIMA': 25    3.327537e+06
26    3.324649e+06
27    3.322780e+06
28    3.321570e+06
29    3.320787e+06
30    3.320281e+06
Name: predicted_mean, dtype: float64, 'WMA': [np.float64(3690935.6), np.float64(3578442.52), np.float64(3696278.184), np.float64(3658256.1928), np.float64(3629755.95376), np.float64(3671567.140592)], 'Total_tonnes': 164629.87}


In [20]:
with open("forecasts1.pkl", "wb") as f:
    pickle.dump(final_forecasts, f)