In [1]:
import os
import pandas as pd
import plotly.graph_objects as go
import numpy as np

from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.stats import mannwhitneyu

In [2]:
# The significance level is the probability of rejecting the null hypothesis when it is true.
alpha = 0.05

random_state = 42

In [3]:
macro_topic_ensemble_inverse = [
    # Code versioning refers to the practice of tracking changes to software code over time.
    {'Code Management': ['Code Versioning']},
    # These words are all related to data management and analysis. They refer to various tasks and techniques used to organize, manipulate, store, transfer, and analyze data.
    {'Data Management': ['Artifact Management', 'Columnar Manipulation', 'CSV Manipulation', 'Data Labeling', 'Data Storage',
                         'Data Transfer', 'Data Visualization', 'Database Connectivity', 'Dataset Versioning', 'Pandas Dataframe', 'Batch Processing']},
    # All of these words are related to the development and management of machine learning models.
    {'Model Management': ['Hyperparameter Tuning',
                          'Model Evaluation', 'Model Exporting', 'Model Registry']},
    # These words are all related to the management and optimization of data pipelines in software development.
    {'Lifecycle Management': ['Pipeline Configuration',
                              'Pipeline Configuration (Data)', 'Pipeline Configuration (Model)', 'Run Management', 'Kubernetes Orchestration']},
    # All of these words relate to the configuration and management of infrastructure aspects of computer systems and networks. Specifically, they involve setting up and optimizing different components such as processing power, memory, network connections, and software to ensure that they work together efficiently and effectively.
    {'Infrastructure Management': ['Apache Spark Configuration', 'Cluster Configuration', 'Docker Configuration', 'GPU Configuration', 'VPC Networking', 'Memory Management',
                                   'Remote Configuration', 'Resource Quota Control', 'TensorFlow Configuration', 'Jupyter Notebook', 'Package Management', 'SDK Management', 'YAML Configuration']},
    # All of these words are related to the deployment and management of machine learning models or web services.
    {'Deployment Management': ['Endpoint Serving', 'Endpoint Deployment', 'Model Serving', 'Model Inference',
                               'JSON Payload', 'Web Service', 'Serverless Serving', 'API Invocation']},
    # All of these words are related to monitoring and logging data in various systems.
    {'Report Management': ['CloudWatch Monitoring',
                           'Metrics Logging', 'TensorBoard Logging', 'Metrics Logging']},
    # All of these words are related to controlling access to information or resources in a system.
    {'Security Management': ['Account Management',
                             'Bucket Access Control', 'Role-based Access Control']},
]

In [4]:
path_challenge_open_closed = 'Open vs Closed'
path_general = os.path.join(os.getcwd(), '..', '..', 'General')

In [None]:
# Compare metrics distribution of open vs closed challenges across different topics

df = pd.read_json(os.path.join(path_general, 'logscale.json'))

df_open = df[df['Challenge_closed_time'].isna()]
df_closed = df[df['Challenge_closed_time'].notna()]

# Challenge topic count
fig_challenge_count = go.Figure()
fig_challenge_count.add_trace(
    go.Violin(
        x=np.full(len(df_open), 'Challenge topic count (higher level)'),
        y=df_open['Challenge_topic_macro'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        name='Open',
    ))
fig_challenge_count.add_trace(
    go.Violin(
        x=np.full(len(df_closed), 'Challenge topic count (higher level)'),
        y=df_closed['Challenge_topic_macro'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        name='Closed',
    ))
fig_challenge_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge count.png'))

# Challenge score
fig_challenge_score = go.Figure()
fig_challenge_score.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_score'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_score.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_score'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_score.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_score.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge score.png'))

# Challenge favorite count
fig_challenge_favorite_count = go.Figure()
fig_challenge_favorite_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_favorite_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_favorite_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_favorite_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_favorite_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_favorite_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge favorite count.png'))

# Challenge view count
fig_challenge_view_count = go.Figure()
fig_challenge_view_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_view_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_view_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_view_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_view_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_view_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge view count.png'))

# Challenge link count
fig_challenge_link_count = go.Figure()
fig_challenge_link_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_link_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_link_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_link_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_link_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_link_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge link count.png'))

# Challenge sentence count
fig_challenge_sentence_count = go.Figure()
fig_challenge_sentence_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_sentence_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_sentence_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_sentence_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_sentence_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_sentence_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge sentence count.png'))

# Challenge word count
fig_challenge_word_count = go.Figure()
fig_challenge_word_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_word_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_word_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_word_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_word_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_word_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge word count.png'))

# Challenge unique word count
fig_challenge_unique_word_count = go.Figure()
fig_challenge_unique_word_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_unique_word_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_unique_word_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_unique_word_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_unique_word_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_unique_word_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge unique word count.png'))

# Challenge information entropy
fig_challenge_information_entropy = go.Figure()
fig_challenge_information_entropy.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_information_entropy'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_information_entropy.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_information_entropy'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_information_entropy.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_information_entropy.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge information entropy.png'))

# Challenge readability
fig_challenge_readability = go.Figure()
fig_challenge_readability.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_readability'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_readability.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_readability'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_readability.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_readability.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge readability.png'))

# Challenge answer count
fig_challenge_answer_count = go.Figure()
fig_challenge_answer_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_answer_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_answer_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_answer_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_answer_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_answer_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge answer count.png'))

# Challenge comment count
fig_challenge_comment_count = go.Figure()
fig_challenge_comment_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_comment_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_comment_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_comment_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_comment_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_comment_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge comment count.png'))

# Challenge participation count
fig_challenge_participation_count = go.Figure()
fig_challenge_participation_count.add_trace(
    go.Violin(
        x=df_open['Challenge_topic_macro'],
        y=df_open['Challenge_participation_count'],
        meanline_visible=True,
        line_color='blue',
        side='positive',
        opacity=0.5,
        legendgroup='Open',
        scalegroup='Open',
        name='Open',
    ))
fig_challenge_participation_count.add_trace(
    go.Violin(
        x=df_closed['Challenge_topic_macro'],
        y=df_closed['Challenge_participation_count'],
        meanline_visible=True,
        line_color='orange',
        side='negative',
        opacity=0.5,
        legendgroup='Closed',
        scalegroup='Closed',
        name='Closed',
    ))
fig_challenge_participation_count.update_layout(
    height=1000,
    width=2000,
    font=dict(size=20),
    margin=dict(l=0, r=0, t=0, b=0),
)
fig_challenge_participation_count.write_image(os.path.join(
    path_challenge_open_closed, 'Challenge participation count.png'))

for name, group in df.groupby('Challenge_topic_macro'):
    open = group[group['Challenge_closed_time'].isna()]
    closed = group[group['Challenge_closed_time'].notna()]
    
    # Challenge score
    challenge_score_open = open[open['Challenge_score'].notna(
    )]['Challenge_score']
    challenge_score_closed = closed[closed['Challenge_score'].notna(
    )]['Challenge_score']
    if len(challenge_score_open) * len(challenge_score_closed) > 0:
        _, p = mannwhitneyu(challenge_score_open, challenge_score_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge score')

    # Challenge favorite count
    challenge_favorite_count_open = open[open['Challenge_favorite_count'].notna(
    )]['Challenge_favorite_count']
    challenge_favorite_count_closed = closed[closed['Challenge_favorite_count'].notna(
    )]['Challenge_favorite_count']
    if len(challenge_favorite_count_open) * len(challenge_favorite_count_closed) > 0:
        _, p = mannwhitneyu(challenge_favorite_count_open,
                            challenge_favorite_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge favorite count')

    # Challenge view count
    challenge_view_count_open = open[open['Challenge_view_count'].notna(
    )]['Challenge_view_count']
    challenge_view_count_closed = closed[closed['Challenge_view_count'].notna(
    )]['Challenge_view_count']
    if len(challenge_view_count_open) * len(challenge_view_count_closed) > 0:
        _, p = mannwhitneyu(challenge_view_count_open,
                            challenge_view_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge answer count')

    # Challenge link count
    challenge_link_count_open = open[open['Challenge_link_count'].notna(
    )]['Challenge_link_count']
    challenge_link_count_closed = closed[closed['Challenge_link_count'].notna(
    )]['Challenge_link_count']
    if len(challenge_link_count_open) * len(challenge_link_count_closed) > 0:
        _, p = mannwhitneyu(challenge_link_count_open,
                            challenge_link_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge link count')

    # Challenge sentence count
    challenge_sentence_count_open = open[open['Challenge_sentence_count'].notna(
    )]['Challenge_sentence_count']
    challenge_sentence_count_closed = closed[closed['Challenge_sentence_count'].notna(
    )]['Challenge_sentence_count']
    if len(challenge_sentence_count_open) * len(challenge_sentence_count_closed) > 0:
        _, p = mannwhitneyu(challenge_sentence_count_open,
                            challenge_sentence_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge sentence count')

    # Challenge word count
    challenge_word_count_open = open[open['Challenge_word_count'].notna(
    )]['Challenge_word_count']
    challenge_word_count_closed = closed[closed['Challenge_word_count'].notna(
    )]['Challenge_word_count']
    if len(challenge_word_count_open) * len(challenge_word_count_closed) > 0:
        _, p = mannwhitneyu(challenge_word_count_open,
                            challenge_word_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge word count')

    # Challenge unique word count
    challenge_unique_word_count_open = open[open['Challenge_unique_word_count'].notna(
    )]['Challenge_unique_word_count']
    challenge_unique_word_count_closed = closed[closed['Challenge_unique_word_count'].notna(
    )]['Challenge_unique_word_count']
    if len(challenge_unique_word_count_open) * len(challenge_unique_word_count_closed) > 0:
        _, p = mannwhitneyu(challenge_unique_word_count_open,
                            challenge_unique_word_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge unique word count')

    # Challenge information entropy
    challenge_information_entropy_open = open[open['Challenge_information_entropy'].notna(
    )]['Challenge_information_entropy']
    challenge_information_entropy_closed = closed[closed['Challenge_information_entropy'].notna(
    )]['Challenge_information_entropy']
    if len(challenge_information_entropy_open) * len(challenge_information_entropy_closed) > 0:
        _, p = mannwhitneyu(challenge_information_entropy_open,
                            challenge_information_entropy_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge information entropy')

    # Challenge readability
    challenge_readability_open = open[open['Challenge_readability'].notna(
    )]['Challenge_readability']
    challenge_readability_closed = closed[closed['Challenge_readability'].notna(
    )]['Challenge_readability']
    if len(challenge_readability_open) * len(challenge_readability_closed) > 0:
        _, p = mannwhitneyu(challenge_readability_open,
                            challenge_readability_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge readability')

    # Challenge answer count
    challenge_answer_count_open = open['Challenge_answer_count']
    challenge_answer_count_closed = closed['Challenge_answer_count']
    if len(challenge_answer_count_open) * len(challenge_answer_count_closed) > 0:
        _, p = mannwhitneyu(challenge_answer_count_open,
                            challenge_answer_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge answer count')

    # Challenge comment count
    challenge_comment_count_open = open['Challenge_comment_count']
    challenge_comment_count_closed = closed['Challenge_comment_count']
    if len(challenge_comment_count_open) * len(challenge_comment_count_closed) > 0:
        _, p = mannwhitneyu(challenge_comment_count_open,
                            challenge_comment_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge comment count')

    # Challenge participation count
    challenge_participation_count_open = open['Challenge_participation_count']
    challenge_participation_count_closed = closed['Challenge_participation_count']
    if len(challenge_participation_count_open) * len(challenge_participation_count_closed) > 0:
        _, p = mannwhitneyu(challenge_participation_count_open,
                            challenge_participation_count_closed)
        if p < alpha:
            print(
                f'p = {p:.2f}, indicating different distribution of open vs closed challenge regarding higher level topic {name} in challenge participation count')


In [None]:
# Compare metrics evolution of open vs closed challenges across different topics

df_challenge = pd.read_json(os.path.join(path_general, 'filtered.json'))

fig_challenge_topic_count_open = go.Figure()
fig_challenge_view_count_open = go.Figure()
fig_challenge_answer_count_open = go.Figure()
fig_challenge_comment_count_open = go.Figure()
fig_challenge_participation_count_open = go.Figure()
fig_challenge_favorite_count_open = go.Figure()
fig_challenge_score_open = go.Figure()
fig_challenge_word_count_open = go.Figure()
fig_challenge_unique_word_count_open = go.Figure()
fig_challenge_sentence_count_open = go.Figure()
fig_challenge_link_count_open = go.Figure()
fig_challenge_information_entropy_open = go.Figure()
fig_challenge_readability_open = go.Figure()

fig_challenge_topic_count_closed = go.Figure()
fig_challenge_view_count_closed = go.Figure()
fig_challenge_answer_count_closed = go.Figure()
fig_challenge_comment_count_closed = go.Figure()
fig_challenge_participation_count_closed = go.Figure()
fig_challenge_favorite_count_closed = go.Figure()
fig_challenge_score_closed = go.Figure()
fig_challenge_word_count_closed = go.Figure()
fig_challenge_unique_word_count_closed = go.Figure()
fig_challenge_sentence_count_closed = go.Figure()
fig_challenge_link_count_closed = go.Figure()
fig_challenge_information_entropy_closed = go.Figure()
fig_challenge_readability_closed = go.Figure()

for name, group in df_challenge.groupby('Challenge_topic_macro'):
    open = group[group['Challenge_closed_time'].isna()]
    closed = group[group['Challenge_closed_time'].notna()]

    # plot challenge topic count over time
    group_open = open.groupby(pd.Grouper(key='Challenge_created_time', freq='Y'))[
        'Challenge_topic_macro'].count().reset_index()
    x_open = pd.to_datetime(
        group_open['Challenge_created_time']).values
    y = np.diff(group_open['Challenge_topic_macro'].values)
    y = np.insert(y, 0, 0)
    fig_challenge_topic_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    group_closed = closed.groupby(pd.Grouper(key='Challenge_created_time', freq='Y'))[
        'Challenge_topic_macro'].count().reset_index()
    x_closed = pd.to_datetime(group_closed['Challenge_created_time']).values
    y = np.diff(group_closed['Challenge_topic_macro'].values)
    y = np.insert(y, 0, 0)
    fig_challenge_topic_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge participation count over time
    group_open = open.groupby(pd.Grouper(key='Challenge_created_time', freq='Y'))[['Challenge_participation_count', 'Challenge_comment_count', 'Challenge_answer_count', 'Challenge_score', 'Challenge_view_count', 'Challenge_favorite_count',
                                                                                   'Challenge_link_count', 'Challenge_word_count', 'Challenge_unique_word_count', 'Challenge_sentence_count', 'Challenge_information_entropy', 'Challenge_readability']].sum().reset_index()
    y = group_open['Challenge_participation_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_participation_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    group_closed = closed.groupby(pd.Grouper(key='Challenge_created_time', freq='Y'))[['Challenge_participation_count', 'Challenge_comment_count', 'Challenge_answer_count', 'Challenge_score', 'Challenge_view_count', 'Challenge_favorite_count',
                                                                                       'Challenge_link_count', 'Challenge_word_count', 'Challenge_unique_word_count', 'Challenge_sentence_count', 'Challenge_information_entropy', 'Challenge_readability']].sum().reset_index()
    y = group_closed['Challenge_participation_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_participation_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge answer count over time
    y = group_open['Challenge_answer_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_answer_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    y = group_closed['Challenge_answer_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_answer_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge comment count over time
    y = group_open['Challenge_comment_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_comment_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    y = group_closed['Challenge_comment_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_comment_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge view count over time
    y = group_open['Challenge_view_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_view_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    y = group_closed['Challenge_view_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_view_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge favorite count over time
    y = group_open['Challenge_favorite_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_favorite_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    y = group_closed['Challenge_favorite_count'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_favorite_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge score over time
    y = group_open['Challenge_score'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_score_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    y = group_closed['Challenge_score'].values
    y = np.diff(y)
    y = np.insert(y, 0, 0)
    fig_challenge_score_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    # plot challenge link count over time
    y = group_closed['Challenge_link_count'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_link_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_link_count'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_link_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    # plot challenge word count over time
    y = group_closed['Challenge_word_count'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_word_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_word_count'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_word_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    # plot challenge sentence count over time
    y = group_closed['Challenge_sentence_count'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_sentence_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_sentence_count'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_sentence_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    # plot challenge unique word count over time
    y = group_closed['Challenge_unique_word_count'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_unique_word_count_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_unique_word_count'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_unique_word_count_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    # plot challenge information entropy over time
    y = group_closed['Challenge_information_entropy'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_information_entropy_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_information_entropy'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_information_entropy_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

    # plot challenge readability over time
    y = group_closed['Challenge_readability'].values / group_closed['Challenge_topic_macro'].values
    fig_challenge_readability_closed.add_trace(
        go.Scatter(x=x_closed, y=y, mode='lines', name=name))

    y = group_open['Challenge_readability'].values / group_open['Challenge_topic_macro'].values
    fig_challenge_readability_open.add_trace(
        go.Scatter(x=x_open, y=y, mode='lines', name=name))

fig_challenge_topic_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_view_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_answer_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_comment_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_participation_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_favorite_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_score_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_word_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_unique_word_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_sentence_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_link_count_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_information_entropy_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_readability_open.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))

fig_challenge_topic_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_view_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_answer_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_comment_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_participation_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_favorite_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_score_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_word_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_unique_word_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_sentence_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_link_count_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_information_entropy_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))
fig_challenge_readability_closed.update_layout(
    width=2000,
    height=1000,
    margin=dict(l=0, r=0, t=0, b=0))

fig_challenge_topic_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_topic_count_increase_rate (Open).png'))
fig_challenge_view_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_view_count_increase_rate (Open).png'))
fig_challenge_answer_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_answer_count_increase_rate (Open).png'))
fig_challenge_comment_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_comment_count_increase_rate (Open).png'))
fig_challenge_participation_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_participation_count_increase_rate (Open).png'))
fig_challenge_favorite_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_favorite_count_increase_rate (Open).png'))
fig_challenge_score_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_score_increase_rate (Open).png'))
fig_challenge_link_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_link_count (Open).png'))
fig_challenge_word_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_word_count (Open).png'))
fig_challenge_unique_word_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_unique_word_count (Open).png'))
fig_challenge_sentence_count_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_sentence_count (Open).png'))
fig_challenge_information_entropy_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_information_entropy (Open).png'))
fig_challenge_readability_open.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_readability (Open).png'))

fig_challenge_topic_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_topic_count_increase_rate (Closed).png'))
fig_challenge_view_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_view_count_increase_rate (Closed).png'))
fig_challenge_answer_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_answer_count_increase_rate (Closed).png'))
fig_challenge_comment_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_comment_count_increase_rate (Closed).png'))
fig_challenge_participation_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_participation_count_increase_rate (Closed).png'))
fig_challenge_favorite_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_favorite_count_increase_rate (Closed).png'))
fig_challenge_score_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_score_increase_rate (Closed).png'))
fig_challenge_link_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_link_count (Closed).png'))
fig_challenge_word_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_word_count (Closed).png'))
fig_challenge_unique_word_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_unique_word_count (Closed).png'))
fig_challenge_sentence_count_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_sentence_count (Closed).png'))
fig_challenge_information_entropy_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_information_entropy (Closed).png'))
fig_challenge_readability_closed.write_image(os.path.join(
    path_challenge_open_closed, f'Challenge_readability (Closed).png'))