In [None]:
# Install necessary packages
!pip install streamlit pyngrok pandas matplotlib seaborn scikit-learn statsmodels xgboost lightgbm

# Imports
import os
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error, explained_variance_score
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt
import seaborn as sns
from pyngrok import ngrok

# Ngrok authentication (replace with your own token)
ngrok.set_auth_token("2nsE9E5ElZQDnGbWkjMOHWnV6WK_4P1PbkkCQCKp1CPMmWHKr")

# Start the Streamlit server via ngrok
ngrok.kill()
public_url = ngrok.connect(8501)
print(f'Streamlit app running at: {public_url}')

# Save the Streamlit app to a Python file
with open('app.py', 'w') as f:
    f.write("""
import os
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error, explained_variance_score
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Load the dataset
st.title('Cyber Threat Forecasting Application')
uploaded_file = st.file_uploader('Upload your CSV file', type='csv')

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.subheader("Basic Dataset Information")
    st.write(data.head())

    if 'Date' not in data.columns:
        st.error("The dataset does not contain a 'Date' column. Please upload a valid dataset.")
    elif 'threats' not in data.columns:
        st.error("The dataset does not contain a 'threats' column. Please upload a valid dataset.")
    else:
        try:
            # Adjust date parsing to handle non-standard formats like 'Jul-11'
            def parse_custom_date(date_str):
                try:
                    # Attempt to parse 'Jul-11' type formats as 'MMM-yy'
                    return pd.to_datetime(date_str, format='%b-%y')
                except:
                    return pd.NaT

            # Apply the custom date parser
            data['Date'] = data['Date'].apply(parse_custom_date)
            data.dropna(subset=['Date'], inplace=True)
            data.set_index('Date', inplace=True)
            data.fillna(method='ffill', inplace=True)

            # Data Smoothing and Resampling
            data['threats_smooth'] = data['threats'].rolling(window=5).mean()
            data = data.resample('D').mean()

            # Normalization/Standardization
            scaler = MinMaxScaler()
            data['threats_normalized'] = scaler.fit_transform(data[['threats']])
            standard_scaler = StandardScaler()
            data['threats_standardized'] = standard_scaler.fit_transform(data[['threats']])

            # Time Lagged Features
            data['lag_1'] = data['threats'].shift(1)
            data['lag_2'] = data['threats'].shift(2)
            data['lag_3'] = data['threats'].shift(3)
            data['threats_diff'] = data['threats'].diff()

            # Augmented Dickey-Fuller test
            adf_test = adfuller(data['threats_diff'].dropna())
            st.subheader("Augmented Dickey-Fuller Test")
            st.write(f"ADF Statistic: {adf_test[0]}")
            st.write(f"p-value: {adf_test[1]}")

            # Feature Engineering
            data['day_of_week'] = data.index.dayofweek
            data['month'] = data.index.month

            # Decomposition
            st.subheader('Trend, Seasonal, and Residual Decomposition')
            decomposition = seasonal_decompose(data['threats'], model='additive', period=30)
            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 6))
            decomposition.trend.plot(ax=ax1, title='Trend')
            decomposition.seasonal.plot(ax=ax2, title='Seasonal')
            decomposition.resid.plot(ax=ax3, title='Residual')
            st.pyplot(fig)

            # Drop NaNs
            data.dropna(inplace=True)

            # Model features and target
            features = ['lag_1', 'lag_2', 'lag_3', 'day_of_week', 'month']
            X = data[features]
            y = data['threats']
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

            # Function to Train and Evaluate Models
            def train_and_evaluate(model, model_name):
                try:
                    model.fit(X_train, y_train)
                    y_pred = model.predict(X_test)

                    # Metrics
                    mae = mean_absolute_error(y_test, y_pred)
                    mse = mean_squared_error(y_test, y_pred)
                    rmse = np.sqrt(mse)
                    r2 = r2_score(y_test, y_pred)
                    mape = mean_absolute_percentage_error(y_test, y_pred)
                    evs = explained_variance_score(y_test, y_pred)

                    st.subheader(f'{model_name} Performance')
                    st.write(f'MAE: {mae:.4f}')
                    st.write(f'MSE: {mse:.4f}')
                    st.write(f'RMSE: {rmse:.4f}')
                    st.write(f'R-squared: {r2:.4f}')
                    st.write(f'MAPE: {mape:.4f}')
                    st.write(f'Explained Variance Score: {evs:.4f}')

                    # Plot Actual vs Predicted
                    fig, ax = plt.subplots(figsize=(10, 5))
                    ax.plot(y_test.index, y_test, label='Actual', marker='o', linestyle='dotted')
                    ax.plot(y_test.index, y_pred, label='Predicted', marker='x', linestyle='dashed', color='red')
                    ax.set_title(f'{model_name} - Actual vs Predicted')
                    ax.set_xlabel('Date')
                    ax.set_ylabel('Threats')
                    ax.legend()
                    ax.grid()
                    st.pyplot(fig)

                    # Feature importance plot (for tree-based models)
                    if hasattr(model, 'feature_importances_'):
                        feature_importance = pd.Series(model.feature_importances_, index=features)
                        st.subheader(f'{model_name} - Feature Importance')
                        st.bar_chart(feature_importance)

                    # Error Distribution Plot
                    error = y_test - y_pred
                    fig, ax = plt.subplots()
                    sns.histplot(error, kde=True, bins=20, ax=ax, color='purple')
                    ax.set_title(f'{model_name} - Error Distribution')
                    ax.set_xlabel('Prediction Error')
                    ax.set_ylabel('Frequency')
                    st.pyplot(fig)
                except Exception as e:
                    st.error(f"Model training failed: {str(e)}")

            # Sidebar for Model Selection
            model_choice = st.sidebar.selectbox(
                'Select a Model',
                ['SVR', 'Random Forest', 'XGBoost', 'LightGBM', 'Decision Tree', 'Linear Regression', 'Ridge', 'Lasso']
            )

            # Model Dictionary
            models = {
                "SVR": SVR(kernel='rbf'),
                "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
                "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42),
                "LightGBM": LGBMRegressor(n_estimators=100, learning_rate=0.1, random_state=42),
                "Decision Tree": DecisionTreeRegressor(random_state=42),
                "Linear Regression": LinearRegression(),
                "Ridge": Ridge(),
                "Lasso": Lasso()
            }

            if st.sidebar.button('Run Model'):
                model = models[model_choice]
                train_and_evaluate(model, model_choice)
        except Exception as e:
            st.error(f"An error occurred: {str(e)}")
    """)

Streamlit app running at: NgrokTunnel: "https://b7a4-34-23-93-253.ngrok-free.app" -> "http://localhost:8501"
