# Engagement Metrics

In [1]:
from IPython.display import Markdown, display
import config
import pandas as pd

sparse_event_data_df = pd.read_csv(config.SPARSE_EVENT_DATA_INTERIM)
# sparse_event_data_df.head()

In [2]:
# Exclude empty member_number records
sparse_event_data_df = sparse_event_data_df[sparse_event_data_df["MEMBER_NUMBER"].notnull()]

# Calculate metrics for each member and event type
engagement_metrics_df = (
    sparse_event_data_df.groupby(
        ["MEMBER_NUMBER", "FIRST_NAME", "LAST_NAME", "PRIMARY_EMAIL", "EVENT_CATEGORY"]
    )
    .agg(EVENT_COUNT=("EVENT_TIMESTAMP", "count"), EVENT_TIMESTAMP=("EVENT_TIMESTAMP", "max"))
    .reset_index()
)

engagement_metrics_df.columns = [
    "MEMBER_NUMBER",
    "FIRST_NAME",
    "LAST_NAME",
    "PRIMARY_EMAIL",
    "EVENT_CATEGORY",
    "EVENT_COUNT",
    "EVENT_TIMESTAMP",
]

# Convert LAST_EVENT_TIMESTAMP to datetime
engagement_metrics_df["EVENT_TIMESTAMP"] = pd.to_datetime(engagement_metrics_df["EVENT_TIMESTAMP"])

# Normalize metrics
max_event_count = engagement_metrics_df["EVENT_COUNT"].max()
engagement_metrics_df["EVENT_COUNT"] = engagement_metrics_df["EVENT_COUNT"] / max_event_count

most_recent_event_timestamp = engagement_metrics_df["EVENT_TIMESTAMP"].max()
engagement_metrics_df["EVENT_RECENCY"] = (
    most_recent_event_timestamp - engagement_metrics_df["EVENT_TIMESTAMP"]
).dt.days
max_event_recency = engagement_metrics_df["EVENT_RECENCY"].max()
engagement_metrics_df["NORMALIZED_EVENT_RECENCY"] = 1 - (
    engagement_metrics_df["EVENT_RECENCY"] / max_event_recency
)

# Define weights for each metric
weights = {
    "EVENT_COUNT": 0.7,
    "NORMALIZED_EVENT_RECENCY": 0.3,
}

# Calculate weighted engagement score for each member
engagement_metrics_df["ENGAGEMENT_SCORE"] = (
    engagement_metrics_df["EVENT_COUNT"] * weights["EVENT_COUNT"]
    + engagement_metrics_df["NORMALIZED_EVENT_RECENCY"] * weights["NORMALIZED_EVENT_RECENCY"]
)

# Round the engagement scores to 3 decimal places
engagement_metrics_df["ENGAGEMENT_SCORE"] = engagement_metrics_df["ENGAGEMENT_SCORE"].round(3)

# Calculate overall engagement score for each event category
overall_engagement_df = (
    engagement_metrics_df.groupby("EVENT_CATEGORY")
    .agg({"ENGAGEMENT_SCORE": "mean"})
    .reset_index()
    .rename(columns={"ENGAGEMENT_SCORE": "OVERALL_ENGAGEMENT_SCORE"})
    .sort_values("OVERALL_ENGAGEMENT_SCORE", ascending=False)
)

# Round the overall engagement scores to 3 decimal places
overall_engagement_df["OVERALL_ENGAGEMENT_SCORE"] = overall_engagement_df[
    "OVERALL_ENGAGEMENT_SCORE"
].round(3)

# engagement_metrics_df.head()
# overall_engagement_df.head()

In [3]:
# Create a report in Markdown format of the overall_engagement_df
report = f"""
# Overall Engagement

{overall_engagement_df.rename(columns={"EVENT_CATEGORY": "Event Category", "OVERALL_ENGAGEMENT_SCORE": "Engagement Score"}).to_markdown(index=False)}

## Result Description

The engagement scores for each event category were calculated from active member participation in calendar events since the inception of the club's use of Club Express.

The scores reflect the relative popularity and member involvement in each type of event. Higher scores indicate greater engagement, suggesting that events like Road Rides and Club Meetings are particularly well-received by members. Conversely, events such as Café Clubs and Tours have lower engagement scores, indicating less member interest or participation.

The results are sorted from most popular to least popular. The low scores indicate that only a subset of club members participate in or register for events such as club meetings.

To obtain more accurate data, it would be necessary for club members to register for all activities they participate in and to cancel their registration if they do not attend.
"""
# Render report in Markdown format so that it looks like formated output as opposed to markdown syntax
display(Markdown(report))


# Overall Engagement

| Event Category      |   Engagement Score |
|:--------------------|-------------------:|
| Road Rides          |              0.373 |
| Club Meetings       |              0.354 |
| International       |              0.266 |
| Rider Training      |              0.243 |
| GS Rides            |              0.237 |
| Getaways            |              0.229 |
| Tech Days           |              0.226 |
| Potluck Dinner      |              0.217 |
| Philanthropy        |              0.206 |
| Workshops           |              0.177 |
| 3rd Party           |              0.146 |
| Club Administration |              0.134 |
| Family Picnic       |              0.095 |
| Café Clubs          |              0.056 |
| Tours               |              0.036 |

## Result Description

The engagement scores for each event category were calculated from active member participation in calendar events since the inception of the club's use of Club Express.

The scores reflect the relative popularity and member involvement in each type of event. Higher scores indicate greater engagement, suggesting that events like Road Rides and Club Meetings are particularly well-received by members. Conversely, events such as Café Clubs and Tours have lower engagement scores, indicating less member interest or participation.

The results are sorted from most popular to least popular. The low scores indicate that only a subset of club members participate in or register for events such as club meetings.

To obtain more accurate data, it would be necessary for club members to register for all activities they participate in and to cancel their registration if they do not attend.


In [4]:
# Write the markdown report to file
with open(config.OVERALL_ENGAGEMENT_REPORT, "w") as file:
    file.write(report)