# Libraries and data importing

Importing packages

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [24]:
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.structural import UnobservedComponents
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt, ExponentialSmoothing
from statsmodels.tsa.exponential_smoothing.ets import ETSModel

In [3]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

Importing data

In [26]:
activities = pd.read_parquet('../data-raw/activities.parquet')
activities

Unnamed: 0,contributor,category,repository,activity,date
0,AmplabJenkins,bot,apache/spark,Commenting pull request,2022-11-25 09:55:19+00:00
1,AmplabJenkins,bot,apache/spark,Commenting pull request,2022-11-25 09:55:23+00:00
2,AmplabJenkins,bot,apache/spark,Commenting pull request,2022-11-25 09:55:26+00:00
3,analysis-bot,bot,facebook/react-native,Commenting pull request,2022-11-25 09:55:27+00:00
4,neos-bot,bot,neos/neos-ui-compiled,Pushing commits,2022-11-25 09:55:47+00:00
...,...,...,...,...,...
1015418,798388,human,879434,Reviewing code,2023-04-15 16:06:15+00:00
1015419,798388,human,879434,Reviewing code,2023-04-15 16:07:26+00:00
1015420,784775,human,643744,Creating branch,2023-04-15 16:07:33+00:00
1015421,784775,human,888378,Opening pull request,2023-04-15 16:08:07+00:00


Calculating the date 3 months ago from the last date in the data and filters the dataframe to include only the rows with dates greater than or equal to the date 3 months ago

In [27]:
thresholded_activities = (
    activities[activities['date'] >= activities['date'].max() - pd.DateOffset(months=3)]
    .groupby('contributor')
    .tail(300)
    .groupby(['category', pd.Grouper(key='date', freq='H'), 'contributor'])['activity']
    .count()
    .reset_index(name='n_activities')
    # keep data for contributors who have more than 10 different hour sequences
    .groupby('contributor')
    .filter(lambda x: len(x) >= 10)
)
thresholded_activities

Unnamed: 0,category,date,contributor,n_activities
0,bot,2023-01-15 16:00:00+00:00,AmplabJenkins,1
1,bot,2023-01-15 17:00:00+00:00,apollo-cla,1
2,bot,2023-01-15 17:00:00+00:00,stickler-ci[bot],1
3,bot,2023-01-15 18:00:00+00:00,AmplabJenkins,2
5,bot,2023-01-15 18:00:00+00:00,mysql-oca-bot,1
...,...,...,...,...
53322,human,2023-04-15 15:00:00+00:00,947579,6
53323,human,2023-04-15 16:00:00+00:00,387854,2
53324,human,2023-04-15 16:00:00+00:00,668373,1
53325,human,2023-04-15 16:00:00+00:00,784775,4


Filling n_activities with zeros for the empty hours between the minimum and maximum date for each contributor, and sets the category and contributor columns to a default values 

In [28]:
data = (
    thresholded_activities
    .groupby(['category','contributor'])
    .apply(
        lambda x: x
        .set_index('date')
        # resample data to fill in missing hours with zeros
        .resample('H')
        .sum()
        .fillna(0)
        .reset_index()
        # set the 'category' and 'contributor' columns to the values of the data group
        .assign(category=lambda y: y.iloc[0]['category'], contributor=lambda y: y.iloc[0]['contributor'])
        )
    .set_index('date')
    .rename_axis(None)
)
data

Unnamed: 0,category,contributor,n_activities
2023-01-18 10:00:00+00:00,bot,0crat,1
2023-01-18 11:00:00+00:00,bot,0crat,12
2023-01-18 12:00:00+00:00,bot,0crat,3
2023-01-18 13:00:00+00:00,bot,0crat,6
2023-01-18 14:00:00+00:00,bot,0crat,2
...,...,...,...
2023-04-06 05:00:00+00:00,human,999769,0
2023-04-06 06:00:00+00:00,human,999769,0
2023-04-06 07:00:00+00:00,human,999769,2
2023-04-06 08:00:00+00:00,human,999769,0


In [None]:
data[data['contributor'] == 'DrahtBot'].head(60)

# Autocorrelation function

In [None]:
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
temp = data[data['contributor'] == 'pytorchbot']
lags = min(int((len(temp)/2)-1),168)

acf_values, _ = acf(temp['n_activities'].values, nlags=lags, alpha=0.05)

print('The optimal numer of lags is :', np.argmax(acf_values[1:])+1)

# Plot the autocorrelation function
plot_acf(temp['n_activities'].values, lags=lags)
plt.show()

In [None]:
temp = data[data['contributor'] == 'lingohub[bot]']
lags = min(int((len(temp)/2)-1),168)

pacf_values, _ = pacf(temp['n_activities'].values, nlags=lags, alpha=0.05, method='ols')

print('The optimal numer of lags is :', np.argmax(pacf_values[1:])+1)

# Plot the partial autocorrelation function
plot_pacf(temp['n_activities'].values, lags=lags)
plt.show()

# 1. Autoregressive model

Percentage of Predicted Values Greater Than or Equal to Actual Values (PPG), a new evaluation metric that calculates the percentage of predicted values greater than or equal to the actual values. We can define this metric as follows:

$$PGA = \frac{\sum_{i=1}^{n} [y_i \leq \hat{y}_i]}{n} \times 100$$

In [7]:
def pga_score(y_true, y_pred):
    return (y_pred >= y_true).mean()

Partial autocorrelation is a commonly used tool for identifying the order of an autoregressive model.

In [None]:
def ar_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Choose the optimal number of lags
    lags = min(int((len(train)/2)-1),24*7)
    pacf_values, _ = pacf(train['n_activities'].values, nlags=lags, alpha=0.05, method='ols')
    optimal_lags = np.argmax(pacf_values[1:])+1

    # Fit the model
    model = AutoReg(train['n_activities'], lags=optimal_lags).fit()

    # Forecast the test set using confidence interval with 95%
    predictions = model.get_prediction(start=len(train), end=len(train)+len(test)-1).summary_frame(alpha=0.05)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions['mean']),
        'mae': mean_absolute_error(test['n_activities'], predictions['mean']),
        'mse': mean_squared_error(test['n_activities'], predictions['mean']),
        'pga': pga_score(test['n_activities'], predictions['mean']),
        'pga_ci_upper': pga_score(test['n_activities'], predictions['mean_ci_upper']),
        'n_activities': contributor['n_activities'].sum(),
        'optimal_lags': optimal_lags
    })

    return metrics


In [None]:
# Apply the function to each contributor
ar_results = data.groupby(['category', 'contributor']).apply(ar_model).reset_index(drop=True)

In [None]:
ar_results.sort_values(by='pga', ascending=False).head(40)

In [None]:
ar_results.to_csv('../eval-rest/ar_model_metrics_ci.csv', index=False)

In [None]:
ar_results = pd.read_csv('../eval-rest/ar_model_metrics.csv')
display(ar_results['optimal_lags'].value_counts().to_frame().head())
sns.countplot(x='optimal_lags', hue='category', data=ar_results[ar_results['optimal_lags'].isin([1, 2, 24, 168])])

# 2. Moving-average model

In [None]:
def ma_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Choose the optimal number of lags
    lags = min(int((len(train)/2)-1),24*7)
    acf_values, _ = acf(temp['n_activities'].values, nlags=lags, alpha=0.05)
    optimal_lags = np.argmax(acf_values[1:])+1

    # Fit the model
    model = ARIMA(train['n_activities'], order=(0, 0, optimal_lags)).fit()

    # Forecast the test set
    predictions = model.forecast(steps=len(test))

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum(),
        'optimal_lags': optimal_lags
    })

    return metrics

In [None]:
# Apply the function to each contributor
ma_results = data.groupby(['category', 'contributor']).apply(ma_model).reset_index(drop=True)

In [None]:
ma_results.sort_values(by='r2', ascending=False).head(40)

In [None]:
ma_results.to_csv('../eval-rest/ma_model_metrics.csv', index=False)

In [None]:
ma_results = pd.read_csv('../eval-rest/ma_model_metrics.csv')
display(ma_results['optimal_lags'].value_counts().to_frame().head())
sns.countplot(x='optimal_lags', hue='category', data=ma_results[ma_results['optimal_lags'].isin([2, 24])])

# 3. Autoregressive Moving-average model

In [None]:
def arma_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Choose the optimal parameters
    lags = min(int((len(train)/2)-1),24*7)

    pacf_values, _ = pacf(temp['n_activities'].values, nlags=lags, alpha=0.05)
    p = np.argmax(pacf_values[1:])+1

    acf_values, _ = acf(temp['n_activities'].values, nlags=lags, alpha=0.05)
    q = np.argmax(acf_values[1:])+1

    print (p, q)

    # Fit the model
    model = ARIMA(train['n_activities'], order=(1, 0, 1)).fit()

    # Forecast the test set
    predictions = model.forecast(steps=len(test))

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum(),
        'p_order': p,
        'q_order': q
    })

    return print(metrics)

arma_model(data[data['contributor'] == 'lingohub[bot]'])

In [None]:
def arma_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = ARIMA(train['n_activities'], order=(1, 0, 1)).fit(method='innovations_mle')

    # Forecast the test set
    predictions = model.forecast(steps=len(test))

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [None]:
# Apply the function to each contributor
arma_results = data.groupby(['category', 'contributor']).apply(arma_model).reset_index(drop=True)

In [None]:
arma_results.sort_values(by='r2', ascending=False).head(40)

In [None]:
arma_results.to_csv('../eval-rest/arma_model_metrics.csv', index=False)

# 4. Autoregressive integrated Moving-average model

In [None]:
def arima_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = ARIMA(train['n_activities'], order=(1, 1, 1)).fit()

    # Forecast the test set
    predictions = model.forecast(steps=len(test))

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [None]:
# Apply the function to each contributor
arima_results = data.groupby(['category', 'contributor']).apply(arima_model).reset_index(drop=True)

In [None]:
arima_results.sort_values(by='r2', ascending=False).head(40)

In [None]:
arima_results.to_csv('../eval-rest/arima_model_metrics.csv', index=False)

# 5. Seasonal Autoregressive integrated Moving-average model

In [None]:
import pmdarima as pm

def sarimax_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Use auto_arima to select the optimal seasonal_order parameter
    optimal_model = pm.auto_arima(train['n_activities'], seasonal=True, m=24)

    # Fit the model
    model = SARIMAX(train['n_activities'], order=optimal_model.order, seasonal_order=optimal_model.seasonal_order).fit(disp=False, method='lbfgs')

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum(),
        'order' : optimal_model.order,
        'seasonal_order' : optimal_model.seasonal_order
    })

    return print(metrics)

sarimax_model(data[data['contributor'] == 'lingohub[bot]'])

In [None]:
def sarima_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = SARIMAX(train['n_activities'], order=(1, 0, 1), seasonal_order=(1, 0, 1, 24), enforce_invertibility=False, enforce_stationarity=False).fit(disp=False, method='lbfgs')

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [8]:
def sarima_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = SARIMAX(train['n_activities'], order=(1, 0, 1), seasonal_order=(1, 0, 1, 24), enforce_invertibility=False, enforce_stationarity=False).fit(disp=False, method='lbfgs')

    # Forecast the test set using confidence interval with 95%
    predictions = model.get_prediction(start=len(train), end=len(train)+len(test)-1).summary_frame(alpha=0.05)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions['mean']),
        'mae': mean_absolute_error(test['n_activities'], predictions['mean']),
        'mse': mean_squared_error(test['n_activities'], predictions['mean']),
        'pga': pga_score(test['n_activities'], predictions['mean']),
        'pga_ci_upper': pga_score(test['n_activities'], predictions['mean_ci_upper']),
        'n_activities': contributor['n_activities'].sum(),
    })

    return metrics

In [None]:
# Apply the function to each contributor
sarima_results = data.groupby(['category', 'contributor']).apply(sarima_model).reset_index(drop=True)

In [10]:
sarima_results.sort_values(by='pga', ascending=False).head(40)

Unnamed: 0,contributor,category,r2,mae,mse,pga,pga_ci_upper,n_activities
211,renovate-approve-2[bot],bot,-15.8189,5.738432,36.328824,1.0,1.0,300
265,vscode-issue-tracker-bot,bot,1.0,0.0,0.0,1.0,1.0,300
176,ninjadotorg-bot,bot,0.0,0.003651,1.4e-05,1.0,1.0,300
690,978945,human,-0.001656,0.010458,0.004725,0.995261,0.995261,54
617,874434,human,-0.008151,0.005415,0.004778,0.995238,0.995238,16
570,784575,human,-0.037156,0.025887,0.006734,0.993464,1.0,113
438,583469,human,-0.007576,0.007519,0.007519,0.992481,0.992481,27
574,787644,human,-0.018558,0.010271,0.008417,0.991667,0.991667,71
219,runfoapp[bot],bot,-0.030671,0.020086,0.034642,0.991525,0.991525,38
712,997498,human,-0.00967,0.009679,0.009661,0.990338,0.990338,29


In [11]:
sarima_results.to_csv('../eval-rest/sarima_model_metrics_ci.csv', index=False)

# 6. Unobserved components model

In [None]:
def uc_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = UnobservedComponents(train['n_activities'], level=True, seasonal=24).fit(disp=False, method='lbfgs')

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [13]:
def uc_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = UnobservedComponents(train['n_activities'], level=True, seasonal=24).fit(disp=False, method='lbfgs')

    # Forecast the test set using confidence interval with 95%
    predictions = model.get_prediction(start=len(train), end=len(train)+len(test)-1).summary_frame(alpha=0.05)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions['mean']),
        'mae': mean_absolute_error(test['n_activities'], predictions['mean']),
        'mse': mean_squared_error(test['n_activities'], predictions['mean']),
        'pga': pga_score(test['n_activities'], predictions['mean']),
        'pga_ci_upper': pga_score(test['n_activities'], predictions['mean_ci_upper']),
        'n_activities': contributor['n_activities'].sum(),
    })

    return metrics

In [None]:
# Apply the function to each contributor
uc_results = data.groupby(['category', 'contributor']).apply(uc_model).reset_index(drop=True)

In [15]:
uc_results.sort_values(by='pga', ascending=False).head(40)

Unnamed: 0,contributor,category,r2,mae,mse,pga,pga_ci_upper,n_activities
70,codeclimate[bot],bot,-28.288484,17.91668,397.0217,1.0,1.0,300
237,sprucelabs-ci,bot,-24937.982375,49.92873,4676.059,1.0,1.0,300
80,delete-merged-branch[bot],bot,-114.438864,19.05896,711.1034,1.0,1.0,300
42,aws-cdk-automation,bot,-515.845704,7.472571,114.8546,1.0,1.0,300
25,adobe-bot,bot,-21.500339,4.750028,33.75051,1.0,1.0,300
47,ballerina-bot,bot,0.591159,0.7010871,1.475662,1.0,1.0,300
234,sourcegraph-bot,bot,-952.750634,10.2,152.6001,1.0,1.0,300
265,vscode-issue-tracker-bot,bot,0.0,8.511710000000001e-17,2.173476e-31,0.966667,0.966667,300
83,docker-library-bot,bot,-243.716315,4.955365,87.94493,0.875,1.0,300
71,codesandbox[bot],bot,-8.179201,5.110203,36.34214,0.857143,1.0,300


In [16]:
uc_results.to_csv('../eval-rest/uc_model_metrics_ci.csv', index=False)

# 7. Simple exponential smoothing model

In [None]:
def ses_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = SimpleExpSmoothing(train['n_activities']).fit(smoothing_level=0.8, optimized=False, method='L-BFGS-B', remove_bias=True)

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [None]:
# Apply the function to each contributor
ses_results = data.groupby(['category', 'contributor']).apply(ses_model).reset_index(drop=True)

In [None]:
ses_results.sort_values(by='r2', ascending=False).head(40)

In [None]:
ses_results.to_csv('../eval-rest/ses_model_metrics.csv', index=False)

# 8. Holt’s linear (double) exponential smoothing model

In [None]:
def des_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    model = Holt(train['n_activities'], initialization_method='estimated').fit(method='L-BFGS-B')

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [None]:
# Apply the function to each contributor
des_results = data.groupby(['category', 'contributor']).apply(des_model).reset_index(drop=True)

In [None]:
des_results.sort_values(by='r2', ascending=False).head(40)

In [None]:
des_results.to_csv('../eval-rest/des_model_metrics.csv', index=False)

# 9. Holt-Winters (triple) exponential smoothing model

In [None]:
def tes_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    try:
        model = ExponentialSmoothing(train['n_activities'], seasonal_periods=24, trend='add', seasonal='add').fit()
    except ValueError:
        model = ExponentialSmoothing(train['n_activities']).fit()
    except:
        print("Something else went wrong")

    # Forecast the test set
    predictions = model.predict(start=len(train), end=len(train)+len(test)-1)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions),
        'mae': mean_absolute_error(test['n_activities'], predictions),
        'mse': mean_squared_error(test['n_activities'], predictions),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [46]:
def tes_model(contributor):

    print(contributor['contributor'].iloc[0])

    # Spliting the data into training and testing sets for time series forecasting, using a time-based split with split size = 0.9
    train, test = (
        contributor.apply(lambda x: x[:int(0.9*len(x))]),
        contributor.apply(lambda x: x[int(0.9*len(x)):])
    )

    # Set the frequency of the index to hourly
    train.index.freq = 'H'

    # Fit the model
    try:
        model = ETSModel(train['n_activities'], error='add', trend='add', seasonal='add', seasonal_periods=24).fit(disp=False)
    except ValueError:
        model = ETSModel(train['n_activities'], error='add', trend='add').fit(disp=False)
    except:
        print("Something else went wrong")

    # Forecast the test set using confidence interval with 95%
    predictions = model.get_prediction(start=len(train), end=len(train)+len(test)-1).summary_frame(alpha=0.05)

    # Create a series for evaluation metrics and sum of activities
    metrics = pd.Series({
        'contributor': contributor['contributor'].iloc[0],
        'category': contributor['category'].iloc[0],
        'r2': r2_score(test['n_activities'], predictions['mean']),
        'mae': mean_absolute_error(test['n_activities'], predictions['mean']),
        'mse': mean_squared_error(test['n_activities'], predictions['mean']),
        'pga': pga_score(test['n_activities'], predictions['mean']),
        'pga_pi_upper': pga_score(test['n_activities'], predictions['pi_upper']),
        'n_activities': contributor['n_activities'].sum()
    })

    return metrics

In [None]:
# Apply the function to each contributor
tes_results = data.groupby(['category', 'contributor']).apply(tes_model).reset_index(drop=True)

In [50]:
tes_results.sort_values(by='r2', ascending=False).head(40)

Unnamed: 0,contributor,category,r2,mae,mse,pga,pga_ci_upper,n_activities
203,pxw-bot,bot,1.0,3.353289e-14,1.3289570000000001e-27,0.0,1.0,180
181,octokit-fixture-user-a,bot,0.99998,0.008437924,8.308631e-05,0.0,1.0,300
7,Code-Inside-Bot,bot,0.997626,0.05777477,0.01264561,0.967213,1.0,300
84,dontcallmedom-bot,bot,0.988324,0.1470247,0.122183,0.121951,1.0,300
204,pytorchbot,bot,0.939099,0.2419805,0.1310049,0.8,0.955556,300
32,angular-automatic-lock-bot[bot],bot,0.857191,0.5657247,3.912475,0.108108,0.972973,300
47,ballerina-bot,bot,0.850518,0.5522556,0.5395368,0.5,1.0,300
186,openhab-bot,bot,0.769662,0.9587726,2.729252,0.4,0.933333,300
134,istio-policy-bot,bot,0.769299,0.2402489,0.2078057,0.608696,0.978261,300
342,437693,human,0.768452,1.706604,4.225032,0.666667,1.0,300


In [51]:
tes_results.to_csv('../eval-rest/tes_model_metrics_pi.csv', index=False)

# Long Short-Term Memory? why not