In [93]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [94]:
# list of years for which to produce linear forecasts
future_years = [2023, 2028, 2032, 2042, 2050]

In [95]:
# creage 
dfProjGroupsLinear = pd.DataFrame([
    ["Since 1981"         , 1981, 2021,     {}],
    ["Since 2001"         , 2001, 2021,     {}],
    ["Since 2011"         , 2011, 2021,     {}],
    ["Since 2001 w/o 2020", 2001, 2021, {2020}],
    ["Since 2011 w/o 2020", 2011, 2021, {2020}]
], columns=('pgName','pgYearFrom','pgYearTo','pgYearsExclude'))

display(dfProjGroupsLinear)

Unnamed: 0,pgName,pgYearFrom,pgYearTo,pgYearsExclude
0,Since 1981,1981,2021,{}
1,Since 2001,2001,2021,{}
2,Since 2011,2011,2021,{}
3,Since 2001 w/o 2020,2001,2021,{2020}
4,Since 2011 w/o 2020,2011,2021,{2020}


In [96]:
# import historic AADT (created in previous jupyter notebook)
dfHistoricAadt = pd.read_csv('intermediate/historic-aadt.csv', dtype={'ROUTE_ID':'string','FROM_MEASURE':'float','TO_MEASURE':'float','AADT':'int'})
dfHistoricAadt

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,YEAR,AADT
0,0006,0.0,46.04,1981,325
1,0006,0.0,46.04,1982,335
2,0006,0.0,46.04,1983,430
3,0006,0.0,46.04,1984,580
4,0006,0.0,46.04,1985,585
...,...,...,...,...,...
110892,3483,0.0,1.70,2017,990
110893,3483,0.0,1.70,2018,1030
110894,3483,0.0,1.70,2019,1042
110895,3483,0.0,1.70,2020,1041


# Linear forecasts with assist from ChatGPT
https://chat.openai.com/share/d127492a-ad78-4f45-afd0-50e29069db1a

In [97]:
# Initialize a list to store the individual result DataFrames
forecast_results_list = []

# Open the error file
with open('intermediate/linear-forecasts-errors.txt', 'w') as err_file:
    # Loop through the projection groups
    for index, row in dfProjGroupsLinear.iterrows():
        pgName = row['pgName']
        pgYearFrom = row['pgYearFrom']
        pgYearTo = row['pgYearTo']
        pgYearsExclude = set(row['pgYearsExclude'])
        
        display('Forecasting ' + pgName + '...')

        # Group by SEGID and SOURCE and iterate through the groups
        for (ROUTE_ID, FROM_MEASURE, TO_MEASURE), group in dfHistoricAadt.groupby(['ROUTE_ID', 'FROM_MEASURE', 'TO_MEASURE']):
            
            # Filter the data according to the projection group criteria
            filtered_group = group[(group['YEAR'] >= pgYearFrom) & (group['YEAR'] <= pgYearTo)]
            filtered_group = filtered_group[~filtered_group['YEAR'].isin(pgYearsExclude)]
            
            # Check if the filtered group is empty
            if filtered_group.shape[0] == 0:
                error_msg = f"Skipping empty group for ROUTE: {ROUTE_ID}, FROM: {FROM_MEASURE}, TO: {TO_MEASURE}, Projection Group: {pgName}"
                print (error_msg)
                err_file.write(error_msg + f"\n")
                continue

            X = filtered_group['YEAR'].values.reshape(-1, 1)
            y = filtered_group['AADT'].values

            model = LinearRegression()
            model.fit(X, y)
            
            # Predict for the specified future years
            aadt = model.predict(np.array([pgYearFrom] + future_years).reshape(-1, 1))

            # Round the forecasted values to the nearest integers
            aadt = np.rint(aadt).astype(int)

            # Create a dictionary to store results for this group
            result_dict = {'ROUTE_ID': ROUTE_ID, 'FROM_MEASURE': FROM_MEASURE, 'TO_MEASURE': TO_MEASURE, 'PROJ_GRP': pgName}
            result_dict.update({year: aadt for year, aadt in zip([pgYearFrom] + future_years, aadt)})

            # Convert the dictionary to a DataFrame and add to the list
            result_df = pd.DataFrame([result_dict])

            result_df_melt = result_df.melt(id_vars=['ROUTE_ID','FROM_MEASURE','TO_MEASURE','PROJ_GRP'],var_name="YEAR", value_name="LIN_FORECAST")

            forecast_results_list.append(result_df_melt)

# Concatenate all the individual result DataFrames
forecast_results = pd.concat(forecast_results_list, ignore_index=True)

# Now forecast_results contains the forecasts for the specified future years, along with ROUTE_ID, FROM_MEASURE, To_MEASURE, and Projection Group
#display(forecast_results)


'Forecasting Since 1981...'

'Forecasting Since 2001...'

'Forecasting Since 2011...'

'Forecasting Since 2001 w/o 2020...'

'Forecasting Since 2011 w/o 2020...'

In [98]:
# check a snippet
forecast_results[forecast_results['PROJ_GRP']=='Since 1981']

Unnamed: 0,ROUTE_ID,FROM_MEASURE,TO_MEASURE,PROJ_GRP,YEAR,LIN_FORECAST
0,0006,0.0,46.04,Since 1981,1981,465
1,0006,0.0,46.04,Since 1981,2023,357
2,0006,0.0,46.04,Since 1981,2028,344
3,0006,0.0,46.04,Since 1981,2032,334
4,0006,0.0,46.04,Since 1981,2042,308
...,...,...,...,...,...,...
28039,3483,0.0,1.70,Since 1981,2023,1257
28040,3483,0.0,1.70,Since 1981,2028,1494
28041,3483,0.0,1.70,Since 1981,2032,1684
28042,3483,0.0,1.70,Since 1981,2042,2158


In [99]:
# export csv
forecast_results.to_csv('results/linear-forecasts.csv', index=False)