# Random Forest

This script goes through all the random forest stuff.

### Data loading

In [14]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import json

from python_proj.utils.util import safe_save_fig, subtract_dict, Counter

file_name = "dataset_all_days_started_30_06_23"
# base_path = '/workspaces/msc_thesis/data/'
base_path= '/data/s4509412/data/data/'
data_path = f'{base_path}/final_data/{file_name}.csv'
figure_base_path = f"{base_path}/figures/modelling/{file_name}/"

pr_merged_key = 'PullRequestIsMerged'
ftc_key = 'SubmitterIsFirstTimeContributor'

df: pd.DataFrame = pd.read_csv(filepath_or_buffer=data_path, header=0)
# df = df.sample(math.floor(len(df) * 0.1))
ftc_df = df[df[ftc_key]]
print(f'{len(df)=}')
print(f'{len(ftc_df)=}')


dependent = df[pr_merged_key]
ftc_dependent = ftc_df[pr_merged_key]
dropped_fields = [
    # Meta fields
    'ID', 'Project Name', 'Submitter ID', 'PR Number', 'Closed At',

    # dependent and control field
    pr_merged_key, ftc_key,

    # Useless fields because they are (almost) all 0
    "DependencyEcosystemExperienceSubmitterIssueCommentCount",
    "DependencyEcosystemExperienceSubmitterIssueSubmissionCount",
    'DependencyEcosystemExperienceSubmitterPullRequestSuccessRate',
    "DependencyEcosystemExperienceSubmitterPullRequestCommentCount",
    "DependencyEcosystemExperienceSubmitterPullRequestSubmissionCount",

    "IntraProjectSubmitterIssueCommentCount",
    "IntraProjectSubmitterIssueSubmissionCount",

    "InversedDependencyEcosystemExperienceSubmitterIssueCommentCount",
    "InversedDependencyEcosystemExperienceSubmitterIssueSubmissionCount",
    'InversedDependencyEcosystemExperienceSubmitterPullRequestSuccessRate',
    "InversedDependencyEcosystemExperienceSubmitterPullRequestCommentCount",
    "InversedDependencyEcosystemExperienceSubmitterPullRequestSubmissionCount",

    "SharedExperienceIssueDiscussionParticipationByIntegratorAndSubmitter",
    'SharedExperienceIssueSubmittedByIntegratorCommentedOnBySubmitter',
    "SharedExperienceIssueSubmittedBySubmitterCommentedOnByIntegrator",

    "SharedExperiencePullRequestDiscussionParticipationByIntegratorAndSubmitter",
    "SharedExperiencePullRequestSubmittedByIntegratorCommentedOnBySubmitter",
    "SharedExperiencePullRequestSubmittedBySubmitterCommentedOnByIntegrator",
]
independent = df
ftc_independent = ftc_df
for field in dropped_fields:
    independent = independent.drop(field, axis=1)
    ftc_independent = ftc_independent.drop(field, axis=1)

# These fields are zero by definition of "first-time contributor".
ftc_dropped_fields = [
    'IntraProjectSubmitterPullRequestSubmissionCount',
    'IntraProjectSubmitterPullRequestSuccessRate'
]
for field in ftc_dropped_fields:
    ftc_independent = ftc_independent.drop(field, axis=1)

print(f'{len(independent.columns)=}')
print(f'{independent.columns=}')

print(f'{len(ftc_independent.columns)=}')
print(f'{ftc_independent.columns=}')

seed_counter = Counter()

len(df)=1829971
len(ftc_df)=536234
len(independent.columns)=17
independent.columns=Index(['ControlIntegratedBySameUser', 'ControlPullRequestLifeTimeInMinutes',
       'ControlPullRequestHasComments', 'ControlNumberOfCommitsInPullRequest',
       'ControlPullRequestHasCommentByExternalUser',
       'ControlHasHashTagInDescription',
       'ControlIntraProjectPullRequestExperienceOfIntegrator',
       'IntraProjectSubmitterPullRequestSubmissionCount',
       'IntraProjectSubmitterPullRequestSuccessRate',
       'IntraProjectSubmitterPullRequestCommentCount',
       'EcosystemExperienceSubmitterPullRequestSuccessRate',
       'EcosystemExperienceSubmitterPullRequestSubmissionCount',
       'EcosystemExperienceSubmitterPullRequestCommentCount',
       'SharedExperiencePullRequestSubmittedBySubmitterIntegratedByIntegrator',
       'SharedExperiencePullRequestSubmittedByIntegratorIntegratedBySubmitter',
       'EcosystemExperienceSubmitterIssueSubmissionCount',
       'EcosystemExperienceSub

## Regular Model

### Train test data creation

In [15]:
from sklearn.model_selection import train_test_split

def calculate_class_imbalance():
    # Calculates class imbalance.
    class_counts = df[pr_merged_key].value_counts()
    class_imbalance = max(class_counts) / min(class_counts)
    print(f'{class_counts=}')
    print(f'{class_imbalance=}\n')
    return class_imbalance


class_imbalance = calculate_class_imbalance()


def is_majority_class(label) -> bool:
    """
    Helper method for standardizing majority 
    class filtering later in the notebook.
    """

    # NOTE: You have to manually change this
    # if the majority class is different.
    return label


def get_imbalance_weights(__df):
    return list([1.0 if is_majority_class(label) else class_imbalance
                 for label in __df])


# Makes 80:20 dataset splits for testing/training
train, test = train_test_split(df, test_size=0.2)
print(f'{len(train)=}, {len(test)=}\n')

# Labels
train_labels = train[pr_merged_key]
test_labels = test[pr_merged_key]

# Imbalance weights
train_imbalance_weights = get_imbalance_weights(train_labels)
test_imbalance_weights = get_imbalance_weights(test_labels)

# Predictors
train_predictors = train[independent.columns]
test_predictors = test[independent.columns]

print(f'{len(independent.columns)=}')
print(f'{independent.columns=}')

independent.describe()


class_counts=PullRequestIsMerged
True     1407650
False     422321
Name: count, dtype: int64
class_imbalance=3.3331281181849826

len(train)=1463976, len(test)=365995

len(independent.columns)=17
independent.columns=Index(['ControlIntegratedBySameUser', 'ControlPullRequestLifeTimeInMinutes',
       'ControlPullRequestHasComments', 'ControlNumberOfCommitsInPullRequest',
       'ControlPullRequestHasCommentByExternalUser',
       'ControlHasHashTagInDescription',
       'ControlIntraProjectPullRequestExperienceOfIntegrator',
       'IntraProjectSubmitterPullRequestSubmissionCount',
       'IntraProjectSubmitterPullRequestSuccessRate',
       'IntraProjectSubmitterPullRequestCommentCount',
       'EcosystemExperienceSubmitterPullRequestSuccessRate',
       'EcosystemExperienceSubmitterPullRequestSubmissionCount',
       'EcosystemExperienceSubmitterPullRequestCommentCount',
       'SharedExperiencePullRequestSubmittedBySubmitterIntegratedByIntegrator',
       'SharedExperiencePullRequestSu

Unnamed: 0,ControlPullRequestLifeTimeInMinutes,ControlNumberOfCommitsInPullRequest,ControlIntraProjectPullRequestExperienceOfIntegrator,IntraProjectSubmitterPullRequestSubmissionCount,IntraProjectSubmitterPullRequestSuccessRate,IntraProjectSubmitterPullRequestCommentCount,EcosystemExperienceSubmitterPullRequestSuccessRate,EcosystemExperienceSubmitterPullRequestSubmissionCount,EcosystemExperienceSubmitterPullRequestCommentCount,SharedExperiencePullRequestSubmittedBySubmitterIntegratedByIntegrator,SharedExperiencePullRequestSubmittedByIntegratorIntegratedBySubmitter,EcosystemExperienceSubmitterIssueSubmissionCount,EcosystemExperienceSubmitterIssueCommentCount
count,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0,1829971.0
mean,31892.93,4.932661,1543.313,70.95219,0.5792611,116.8831,0.5057427,76.02197,138.6587,9.593894,1.851191,40.583,313.178
std,140072.9,72.57314,4083.37,224.0303,0.4452852,418.7254,0.412624,307.3675,540.3468,55.06976,16.07797,101.1044,951.5315
min,0.01666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,53.3,1.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
50%,986.9167,1.0,153.0,5.0,0.8571429,3.0,0.65625,4.0,4.0,0.0,0.0,6.0,23.0
75%,7351.4,3.0,852.0,49.0,0.9758065,47.0,0.9,34.0,50.0,1.0,0.0,34.0,181.0
max,4489490.0,10000.0,31492.0,4747.0,1.0,13895.0,1.0,6890.0,13909.0,1789.0,865.0,2713.0,21803.0


### Full Model Creation

In [16]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import f1_score, confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, roc_auc_score


def calculate_metrics(predicted_labels, true_labels, sample_weights=None):
    return {
        "accuracy": accuracy_score(true_labels, predicted_labels, sample_weight=sample_weights),
        "precision": precision_score(true_labels, predicted_labels, sample_weight=sample_weights),
        "recall": recall_score(true_labels, predicted_labels, sample_weight=sample_weights),
        "f1": f1_score(true_labels, predicted_labels, sample_weight=sample_weights)
    }


def create_model(predictor_labels, is_weighted=False):
    # Model creation and predictions.
    random_state = seed_counter.get_next()
    print(f'{random_state=}')
    weighted_rf = RandomForestRegressor(
        n_estimators=100,
        random_state=random_state)

    r_train_predictors = train_predictors[predictor_labels]
    r_test_preditors = test_predictors[predictor_labels]

    if is_weighted:
        print("Training weighted model")
        weighted_rf.fit(r_train_predictors, train_labels,
                        train_imbalance_weights)
    else:
        print("Training unweighted model")
        weighted_rf.fit(r_train_predictors, train_labels)

    predictions = weighted_rf.predict(r_test_preditors)
    predictions = [pred >= 0.5 for pred in predictions]

    # F1 scores.
    conf = confusion_matrix(test_labels, predictions)

    unweighted_metrics = calculate_metrics(predictions, test_labels)
    weighted_metricss = calculate_metrics(
        predictions, test_labels, test_imbalance_weights)

    print(f'{unweighted_metrics=}')
    print(f'{weighted_metricss=}')

    # Other metrics.
    print(f'{conf=}')
    print(classification_report(
        test_labels, predictions,
        sample_weight=test_imbalance_weights
    ))

    return weighted_rf, weighted_metricss, unweighted_metrics

# These are more or less equally good.

unweighted_rf, unweighted_weighted_f1, unweighted_unweighted_f1 = create_model(
    train_predictors.columns, False)

weighted_rf,  weighted_weighted_f1, weighted_unweighted_f1 = create_model(
    train_predictors.columns, True)

diff_weighted_f1s = subtract_dict(
    unweighted_weighted_f1, weighted_weighted_f1)

diff_unweighted_f1s = subtract_dict(
    unweighted_unweighted_f1, weighted_unweighted_f1)

print("Comparison model with weighted training vs. unweighted training (negative means weighted is better):")
print(f'Unweighted metrics: \n{json.dumps(diff_weighted_f1s, indent=4)}\n')
print(f'Weighted metrics: \n{json.dumps(diff_unweighted_f1s, indent=4)}\n')

# As the difference in performance is negligable,
# the unweighted model is picked for comparison.
full_weighted_f1 = unweighted_weighted_f1
full_unweighted_f1 = unweighted_unweighted_f1


random_state=43
Training unweighted model


KeyboardInterrupt: 

### Control Model Creation

Creates a random forest model using the different control variables AND intra-project experience.

In [None]:
controls = ['ControlIntegratedBySameUser',
            'ControlPullRequestLifeTimeInMinutes',
            'ControlPullRequestHasComments',
            'ControlNumberOfCommitsInPullRequest',
            'ControlPullRequestHasCommentByExternalUser',
            'ControlHasHashTagInDescription',
            'ControlIntraProjectPullRequestExperienceOfIntegrator',
            'IntraProjectSubmitterPullRequestSubmissionCount',
            'IntraProjectSubmitterPullRequestSuccessRate',
            'IntraProjectSubmitterPullRequestCommentCount']


# unweigthed model: weighted_f1=0.83 and unweighted_f1=0.93
control_model, control_weighted_f1, control_unweighted_f1 = create_model(
    controls, False)


diff_weighted_full_vs_control = subtract_dict(
    full_weighted_f1, control_weighted_f1)
diff_unweighted_full_vs_control = subtract_dict(
    full_unweighted_f1, control_unweighted_f1)


print("Comparison full model vs. control model: (negatives means the control is better)")
print(f'Unweighted metrics: \n{json.dumps(diff_unweighted_full_vs_control, indent=4)}\n')
print(f'Weighted metrics: \n{json.dumps(diff_weighted_full_vs_control, indent=4)}\n')


random_state=45
Training unweighted model
unweighted_metrics={'accuracy': 0.8636612021857923, 'precision': 0.890261138780455, 'recall': 0.9386531365313653, 'f1': 0.9138169257340242}
weighted_metricss={'accuracy': 0.7765032910202875, 'precision': 0.7101293191586665, 'recall': 0.9386531365313653, 'f1': 0.8085543492664121}
conf=array([[ 5155,  3261],
       [ 1729, 26455]])
              precision    recall  f1-score   support

       False       0.91      0.61      0.73 27869.523324854028
        True       0.71      0.94      0.81   28184.0

    accuracy                           0.78 56053.52332485403
   macro avg       0.81      0.78      0.77 56053.52332485403
weighted avg       0.81      0.78      0.77 56053.52332485403

Comparison full model vs. control model: (negatives means the control is better)
Unweighted F1s: 
{
    "precision": 0.0038069412034946737,
    "recall": 0.010041158103888725,
    "f1": 0.0067546012069295,
    "accuracy": 0.01027322404371589
}

Weighted F1s: 
{
    

### Calculate feature importance through exclusion.

In [None]:
def test_importance_through_exclusion(full_weighted_metrics: dict, 
                                      full_unweighted_metrics: dict,
                                      _independent: dict):
    f1_differences_per_feature = {}

    for feature in _independent.columns:
        features_excl = _independent.columns.drop(feature)

        rf, feature_weighted_f1, feature_unweighted_f1 = create_model(
            features_excl, False)

        diff_weighted_full_vs_feature_excl = subtract_dict(
            full_weighted_metrics, feature_weighted_f1)
        diff_unweighted_full_vs_feature_excl = subtract_dict(
            full_unweighted_metrics, feature_unweighted_f1)

        f1_differences_per_feature[feature] = {
            'Weighted metrics': feature_weighted_f1,
            'Unweighted metrics': feature_unweighted_f1,
            'Weighted metrics difference': diff_weighted_full_vs_feature_excl,
            'Unweighted metrics difference': diff_unweighted_full_vs_feature_excl
        }

    return f1_differences_per_feature


diffs_per_feature = test_importance_through_exclusion(full_weighted_f1, full_unweighted_f1, independent)
print("Comparison full model vs. models with single features excluded: (negatives means the model with excluded features is better)")
print(f'{json.dumps(diffs_per_feature, indent=4)}')


random_state=46
Training unweighted model
unweighted_metrics={'accuracy': 0.8518306010928962, 'precision': 0.8782279238259829, 'recall': 0.937588702810105, 'f1': 0.9069380330513274}
weighted_metricss={'accuracy': 0.7521599221536476, 'precision': 0.6853256868578123, 'recall': 0.937588702810105, 'f1': 0.7918515305972985}
conf=array([[ 4752,  3664],
       [ 1759, 26425]])
              precision    recall  f1-score   support

       False       0.90      0.56      0.69 27869.523324854028
        True       0.69      0.94      0.79   28184.0

    accuracy                           0.75 56053.52332485403
   macro avg       0.79      0.75      0.74 56053.52332485403
weighted avg       0.79      0.75      0.74 56053.52332485403

random_state=47
Training unweighted model
unweighted_metrics={'accuracy': 0.8462568306010929, 'precision': 0.8708690934201441, 'recall': 0.9396820891285836, 'f1': 0.9039679153511392}
weighted_metricss={'accuracy': 0.7376751483920374, 'precision': 0.6706808214081312, 

### Partial Dependence Plots

Generates partial dependence plots of the full model using the test data.

In [None]:
from sklearn.inspection import PartialDependenceDisplay
from matplotlib import pyplot as plt


def create_partial_dependence_plots(
        __rf, __df,
        __used_predictor_labels,
        model_name: str = "",
        average_only=True):
    
    if average_only:
        # Collective partial dependence plot.
        PartialDependenceDisplay.from_estimator(
            __rf, __df, __used_predictor_labels)

        fig = plt.gcf()
        axs = fig.axes

        lines = []
        for ax in axs:
            __lines = list([(line.get_xdata(), line.get_ydata())
                            for line in ax.lines])
            lines.extend(__lines)

        for (x, y), label in zip(lines, __used_predictor_labels):
            plt.clf()
            plt.plot(x, y, linestyle='-', color='#e69d00')
            plt.xlabel(label)
            plt.ylabel('Partial Dependence')
            plt.tight_layout()
            output_path = f"{figure_base_path}/partial-dependence/{model_name}/{label}.png"
            safe_save_fig(output_path)

    else:
        # TODO: create a plot where the average response is displayed
        # as a line, and the min/max as a grey area around it.
        pdd = PartialDependenceDisplay.from_estimator(
            __rf, __df, __used_predictor_labels, kind='individual')


create_partial_dependence_plots(weighted_rf,
                                test_predictors, independent.columns,
                                model_name="weighted")

create_partial_dependence_plots(unweighted_rf,
                                test_predictors, independent.columns,
                                model_name="unweighted")


## First-time contributor Control

### Generate Train and Test data

In [None]:
# NOTE: this is more-or-less copied from above.

from sklearn.model_selection import train_test_split

def calculate_class_imbalance():
    # Calculates class imbalance.
    class_counts = ftc_df[pr_merged_key].value_counts()
    class_imbalance = max(class_counts) / min(class_counts)
    print(f'{class_counts=}')
    print(f'{class_imbalance=}\n')
    return class_imbalance


class_imbalance = calculate_class_imbalance()


def is_majority_class(label) -> bool:
    """
    Helper method for standardizing majority 
    class filtering later in the notebook.
    """

    # NOTE: You have to manually change this
    # if the majority class is different.
    return label


def get_imbalance_weights(__df):
    return list([1.0 if is_majority_class(label) else class_imbalance
                 for label in __df])


# Makes 80:20 dataset splits for testing/training
train, test = train_test_split(ftc_df, test_size=0.2)
print(f'{len(train)=}, {len(test)=}\n')

# Labels
train_labels = train[pr_merged_key]
test_labels = test[pr_merged_key]

# Imbalance weights
train_imbalance_weights = get_imbalance_weights(train_labels)
test_imbalance_weights = get_imbalance_weights(test_labels)

# Predictors
train_predictors = train[ftc_independent.columns]
test_predictors = test[ftc_independent.columns]

print(f'{len(ftc_independent.columns)=}')
print(f'{ftc_independent.columns=}')

ftc_independent.describe()


class_counts=PullRequestIsMerged
True     33756
False    19763
Name: count, dtype: int64
class_imbalance=1.7080402772858372

len(train)=42815, len(test)=10704

len(ftc_independent.columns)=15
ftc_independent.columns=Index(['ControlIntegratedBySameUser', 'ControlPullRequestLifeTimeInMinutes',
       'ControlPullRequestHasComments', 'ControlNumberOfCommitsInPullRequest',
       'ControlPullRequestHasCommentByExternalUser',
       'ControlHasHashTagInDescription',
       'ControlIntraProjectPullRequestExperienceOfIntegrator',
       'IntraProjectSubmitterPullRequestCommentCount',
       'EcosystemExperienceSubmitterPullRequestSuccessRate',
       'EcosystemExperienceSubmitterPullRequestSubmissionCount',
       'EcosystemExperienceSubmitterPullRequestCommentCount',
       'SharedExperiencePullRequestSubmittedBySubmitterIntegratedByIntegrator',
       'SharedExperiencePullRequestSubmittedByIntegratorIntegratedBySubmitter',
       'EcosystemExperienceSubmitterIssueSubmissionCount',
       'E

Unnamed: 0,ControlPullRequestLifeTimeInMinutes,ControlNumberOfCommitsInPullRequest,ControlIntraProjectPullRequestExperienceOfIntegrator,IntraProjectSubmitterPullRequestCommentCount,EcosystemExperienceSubmitterPullRequestSuccessRate,EcosystemExperienceSubmitterPullRequestSubmissionCount,EcosystemExperienceSubmitterPullRequestCommentCount,SharedExperiencePullRequestSubmittedBySubmitterIntegratedByIntegrator,SharedExperiencePullRequestSubmittedByIntegratorIntegratedBySubmitter,EcosystemExperienceSubmitterIssueSubmissionCount,EcosystemExperienceSubmitterIssueCommentCount
count,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0,53519.0
mean,61256.08,5.333937,835.016985,0.356901,0.381246,21.63555,51.662905,0.529625,0.144472,13.654534,84.685813
std,200677.7,102.08998,3101.345735,8.014358,0.410114,102.0607,299.183814,7.515226,3.385247,52.350587,422.962656
min,0.01666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,170.2583,1.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2069.183,1.0,40.0,0.0,0.177419,1.0,1.0,0.0,0.0,1.0,3.0
75%,21886.77,2.0,237.0,0.0,0.8,7.0,9.0,0.0,0.0,7.0,20.0
max,3820108.0,10000.0,31481.0,1569.0,1.0,5885.0,13909.0,738.0,324.0,1926.0,11563.0


### Full Model

In [None]:

ftc_unweighted_rf, ftc_unweighted_weighted_metrics, ftc_unweighted_unweighted_metrics = create_model(
    train_predictors.columns, False)

ftc_weighted_rf,  ftc_weighted_weighted_metrics, ftc_weighted_unweighted_metrics = create_model(
    train_predictors.columns, True)

ftc_diff_weighted = subtract_dict(
    ftc_unweighted_weighted_metrics, ftc_weighted_weighted_metrics)
ftc_diff_unweighted = subtract_dict(
    ftc_unweighted_unweighted_metrics, ftc_weighted_unweighted_metrics)

print("Comparison first-time contributor model with weighted training vs. unweighted training (negative means weighted is better):")
print(f'Unweighted metrics: \n{json.dumps(ftc_diff_weighted, indent=4)}\n')
print(f'Weighted metrics: \n{json.dumps(ftc_diff_unweighted, indent=4)}\n')

ftc_full_weighted_metrics = ftc_unweighted_weighted_metrics
ftc_full_unweighted_metrics = ftc_unweighted_unweighted_metrics


Comparison first-time contributor model with weighted training vs. unweighted training (negative means weighted is better):
Unweighted metrics: 
{
    "precision": 0.005486325994504715,
    "recall": -0.0031282586027111536,
    "f1": 0.002323946539917965,
    "accuracy": 0.0045075303429871205
}

Weighted metrics: 
{
    "precision": 0.004315128113100863,
    "recall": -0.0031282586027111536,
    "f1": 0.001023052263134927,
    "accuracy": 0.002522421524663754
}



### Control Model

In [None]:
controls = ['ControlIntegratedBySameUser',
            'ControlPullRequestLifeTimeInMinutes',
            'ControlPullRequestHasComments',
            'ControlNumberOfCommitsInPullRequest',
            'ControlPullRequestHasCommentByExternalUser',
            'ControlHasHashTagInDescription',
            'ControlIntraProjectPullRequestExperienceOfIntegrator',
            'IntraProjectSubmitterPullRequestCommentCount']


ftc_control_model, ftc_control_weighted_metrics, ftc_control_unweighted_metrics = create_model(
    controls, False)

ftc_diff_unweighted_full_vs_control = subtract_dict(
    ftc_full_unweighted_metrics, ftc_control_unweighted_metrics)
ftc_diff_weighted_full_vs_control = subtract_dict(
    ftc_full_weighted_metrics, ftc_control_weighted_metrics)

print("First-time contributor control model metrics:")
print(
    f'Unweighted metrics: {json.dumps(ftc_control_unweighted_metrics, indent=4)=}\n')
print(
    f'Weighted metrics: {json.dumps(ftc_control_weighted_metrics, indent=4)=}\n')

print("Comparison first-time contributor full model vs. control model: (negatives means the control is better)")
print(
    f'Difference unweighted metrics: \n{json.dumps(ftc_diff_unweighted_full_vs_control, indent=4)}\n')
print(
    f'Difference weighted metrics: \n{json.dumps(ftc_diff_weighted_full_vs_control, indent=4)}\n')


random_state=67
Training unweighted model
unweighted_metrics={'accuracy': 0.7391629297458894, 'precision': 0.7678644623582457, 'recall': 0.8371815879636526, 'f1': 0.8010262257696693}
weighted_metricss={'accuracy': 0.7047285433707564, 'precision': 0.659472670101172, 'recall': 0.8371815879636526, 'f1': 0.7377767766989725}
conf=array([[2292, 1699],
       [1093, 5620]])
              precision    recall  f1-score   support

       False       0.78      0.57      0.66 6816.788746648427
        True       0.66      0.84      0.74    6713.0

    accuracy                           0.70 13529.788746648428
   macro avg       0.72      0.71      0.70 13529.788746648428
weighted avg       0.72      0.70      0.70 13529.788746648428

Comparison first-time contributor full model vs. control model: (negatives means the control is better)
Unweighted metrics: 
{
    "precision": 0.009646542043515005,
    "recall": 0.031133621331744354,
    "f1": 0.01938193749563666,
    "accuracy": 0.02242152466367719

### Feature Importance through exclusion

In [None]:
diffs_per_feature = test_importance_through_exclusion(ftc_full_weighted_metrics, ftc_full_unweighted_metrics, ftc_independent)
print("Performance scores of models when excluding a feature and a comparison with the full model (negative scores means the model with an excluded feature is better).")
print(f'{json.dumps(diffs_per_feature, indent=4)}')

random_state=68
Training unweighted model
unweighted_metrics={'accuracy': 0.6914237668161435, 'precision': 0.7188703465982028, 'recall': 0.8342022940563086, 'f1': 0.7722540164103978}
weighted_metricss={'accuracy': 0.6412650412994406, 'precision': 0.59953269377571, 'recall': 0.8342022940563086, 'f1': 0.6976624728475312}
conf=array([[1801, 2190],
       [1113, 5600]])
              precision    recall  f1-score   support

       False       0.73      0.45      0.56 6816.788746648427
        True       0.60      0.83      0.70    6713.0

    accuracy                           0.64 13529.788746648428
   macro avg       0.67      0.64      0.63 13529.788746648428
weighted avg       0.67      0.64      0.63 13529.788746648428

random_state=69
Training unweighted model
unweighted_metrics={'accuracy': 0.7022608370702541, 'precision': 0.7290205248116394, 'recall': 0.835989870400715, 'f1': 0.7788494899729372}
weighted_metricss={'accuracy': 0.6552812386243776, 'precision': 0.6116639907711877, 're

### Partial dependence plots

In [None]:
create_partial_dependence_plots(ftc_weighted_rf,
                                test_predictors, ftc_independent.columns,
                                model_name="ftc_weighted")

create_partial_dependence_plots(ftc_unweighted_rf,
                                test_predictors, ftc_independent.columns,
                                model_name="ftc_unweighted")