In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import LSTM, Dense # type: ignore
from sklearn.metrics import r2_score

#parameters
look_back=96         #1 day of past data
future_steps=720     #1 week of 15-min intervals
epochs=3             #Reduced from 10
batch_size=64        #Faster batch training

#loading data
prb_df=pd.read_csv("/content/drive/MyDrive/DL_Prb_Utilization_Data.csv",parse_dates=["Timestamp"],dayfirst=False)
ue_df=pd.read_csv("/content/drive/MyDrive/Avg_UE_Number_Data.csv",parse_dates=["Timestamp"],dayfirst=False)

#cleaning column names
prb_df.columns=[col.strip() for col in prb_df.columns]
ue_df.columns=[col.strip() for col in ue_df.columns]
assert "NCI" in prb_df.columns and "NCI" in ue_df.columns

def evaluate_forecast(y_true,y_pred):
    r2=r2_score(y_true, y_pred)
    return r2

def create_dataset(series,look_back):
    X,Y=[],[]
    for i in range(len(series) - look_back):
        X.append(series[i:i+look_back])
        Y.append(series[i+look_back])
    return np.array(X),np.array(Y)

def forecast_lstm(df,kpi_col):
    results=[]
    all_forecasts=[]

    target_cells=df["NCI"].unique()

    for cell in target_cells:
      print(f"\n Processing cell {cell}")
      cell_df = df[df["NCI"] == cell].sort_values("Timestamp")

      ts = cell_df[["Timestamp", kpi_col]].dropna()
      ts["Timestamp"] = ts["Timestamp"].astype(str).str.strip()

      #Force correct format
      ts["Timestamp"] = pd.to_datetime(ts["Timestamp"], format="%m-%d-%Y %H:%M", errors="coerce")
      ts = ts.dropna(subset=["Timestamp"])

      #Filter from Feb 1,2024 to May 1,2025
      ts = ts[(ts["Timestamp"] >= "2024-02-01 00:00:00") & (ts["Timestamp"] <= "2025-05-01 23:45:00")]
      ts = ts.set_index("Timestamp").sort_index()

      try:
        print(f"Training data from {ts.index.min()} to {ts.index.max()}")

        values=ts.values.astype("float32")
        scaler=MinMaxScaler()
        scaled=scaler.fit_transform(values)

        X,y = create_dataset(scaled,look_back)
        X = X.reshape((X.shape[0],X.shape[1],1))

        split=int(len(X) * 0.8)
        X_train,X_test = X[:split],X[split:]
        y_train,y_test = y[:split],y[split:]

        model=Sequential()
        model.add(LSTM(50,input_shape=(look_back, 1)))
        model.add(Dense(1))
        model.compile(optimizer="adam",loss="mean_squared_error")
        model.fit(X_train,y_train,epochs=3,batch_size=64,verbose=0)

        y_pred=model.predict(X_test)
        y_pred_inv=scaler.inverse_transform(y_pred)
        y_test_inv=scaler.inverse_transform(y_test.reshape(-1,1))

        r2=evaluate_forecast(y_test_inv.flatten(),y_pred_inv.flatten())
        results.append({
            "nCI":cell,
            "Model":"LSTM",
            "KPI":kpi_col,
            "R2_Score":r2,
            "LastTimestamp":ts.index[-1]
        })

        #forecast
        print("Forecasting from 2025-05-02 00:00:00 forward")
        forecast_scaled=[]
        last_window=scaled[-look_back:].reshape(1,look_back,1)

        for _ in range(future_steps):
            pred= model.predict(last_window,verbose=0)
            forecast_scaled.append(pred[0,0])
            last_window=np.roll(last_window,-1,axis=1)
            last_window[0,-1,0]=pred[0,0]

        forecast = scaler.inverse_transform(np.array(forecast_scaled).reshape(-1,1))
        future_start = pd.Timestamp("2025-05-02 00:00:00")
        future_dates = pd.date_range(
            start=future_start,
            periods=future_steps, freq="15min"
        )

        forecast_df=pd.DataFrame({
            "Timestamp":future_dates,
            "Forecast":forecast.flatten(),
            "nCI":cell,
            "KPI":kpi_col
        })
        all_forecasts.append(forecast_df)

      except Exception as e:
        print(f"Error processing cell {cell}:{e}")
        continue

    if all_forecasts:
        return pd.DataFrame(results),pd.concat(all_forecasts,ignore_index=True)
    else:
        print("No forecasts were generated.")
        return pd.DataFrame(results),pd.DataFrame()

#forecast both KPIs
prb_results,forecast_prb = forecast_lstm(prb_df,"DL_Prb_Utilization")
ue_results,forecast_ue = forecast_lstm(ue_df,"Avg_UE_Number")

#save to google drive
forecast_prb.to_csv("/content/drive/MyDrive/forecast_DL_Prb_Utilization_LSTM.csv",index=False)
forecast_ue.to_csv("/content/drive/MyDrive/forecast_Avg_UE_Number_LSTM.csv",index=False)
pd.concat([prb_results, ue_results]).to_csv("/content/drive/MyDrive/model_accuracy_summary_LSTM.csv",index=False)

print("\nForecasts saved to Google Drive.")


 Processing cell 357783981
Training data from 2024-02-01 00:00:00 to 2025-05-01 23:45:00
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step
Forecasting from 2025-05-02 00:00:00 forward

 Processing cell 357783979
Training data from 2024-02-01 00:00:00 to 2025-05-01 23:45:00
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
Forecasting from 2025-05-02 00:00:00 forward

 Processing cell 357783980
Training data from 2024-02-01 00:00:00 to 2025-05-01 23:45:00
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step
Forecasting from 2025-05-02 00:00:00 forward

 Processing cell 358531244
Training data from 2024-02-01 00:00:00 to 2025-05-01 23:45:00
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step
Forecasting from 2025-05-02 00:00:00 forward

 Processing cell 358531245
Training data from 2024-02-01 00:00:00 to 2025-05-01 23:45:00
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 