In [81]:
import pandas as pd
import re
import unicodedata

In [82]:


def _ascii(s):
    return unicodedata.normalize('NFKD', s).encode('ascii', 'ignore').decode('ascii')

def _standardize_name(s):
    s = _ascii(str(s))
    s = s.lower()
    s = re.sub(r'[\r\n]+', ' ', s)
    s = re.sub(r'[^a-z0-9]+', '_', s)
    s = re.sub(r'_+', '_', s).strip('_')
    return s[:48]
    
def _map_short(norm: str) -> str:
    n = (norm or "").strip().lower()

    # Momod feedback mappings
    if "so_which_session_are_we_reviewing_now" in n:
        return "session"
    if "whats_the_date_of_the_session_being_reviewed" in n or "date_of_the_session" in n:
        return "date"
    if "according_to_momod_how_would_you_rate_the_hangou" in n:
        return "overall"
    if "can_you_share_why_you_gave_that_score" in n:
        return "overall_reason"
    if "who_were_the_participants_in_your_group" in n:
        return "group_participants"
    if "whats_your_rating_for_the_time_allocation_of_thi" in n:
        return "time_allocation"
    if "how_do_you_think_the_conversation_flow_was_durin" in n:
        return "conversation_flow"
    if "how_do_you_think_the_engagement_was_during_the_h" in n:
        return "engagement"
    if "could_you_explain_why_you_gave_the_above_rating_" in n:
        return "ratings_reason"
    if "any_concerns_that_needed_attention_in_the_hangou" in n:
        return "concerns"
    if "did_any_of_the_participants_in_your_hangout_grou" in n:
        return "participant_concerns"
    if "name_momod" in n:
        return "moderator_name"
    if n == "column_13":
        return "extra"

    if n == "timestamp":
        return "timestamp"
    return norm


def standardize_columns(df):
    cols = []
    for c in df.columns:
        norm = _standardize_name(c)
        short = _map_short(norm)
        cols.append(short)
    df = df.copy()
    df.columns = cols
    return df




In [83]:
file = "/home/yusuf/keminggris/data/regular_sesh/MOMOD - Session Feedback 1.0 (Responses) - Form Responses 1.csv"

df = pd.read_csv(file)
df = standardize_columns(df)
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df["date"] = pd.to_datetime(df["date"], errors="coerce")

# define the fixes
fixes = {
    pd.Timestamp("2025-09-07 00:08:02"): pd.Timestamp("2025-09-06"),  # 9/7/2025
    pd.Timestamp("2025-09-08 06:54:43"): pd.Timestamp("2025-09-06"),  # 9/6/2025
    pd.Timestamp("2025-10-05 00:36:49"): pd.Timestamp("2025-10-04"),  # 9/6/2025
    pd.Timestamp("2025-10-11 07:45:55"): pd.Timestamp("2025-10-04"),  # 9/6/2025
    pd.Timestamp("2025-10-11 22:05:27"): pd.Timestamp("2025-10-04"),  # 9/6/2025
    pd.Timestamp("2025-10-19 00:34:46"): pd.Timestamp("2025-10-18"),  # 9/6/2025
}

# apply only where timestamp matches one of the keys
mask = df["timestamp"].isin(fixes)
df.loc[mask, "date"] = df.loc[mask, "timestamp"].map(fixes)

# (optional) keep a display string like your CSV
df["date_str"] = df["date"].dt.strftime("%m/%d/%Y")
df["date_parsed"] = pd.to_datetime(df.get("date"), errors="coerce")
df["session_label"] = df["date_parsed"].dt.strftime("%A, %d %B %Y")
map_session = {
    pd.Timestamp("2025-10-04"): "Regular Session",
}
mask = df['date'].isin(map_session.keys())
df.loc[mask, 'session'] = df.loc[mask, 'date'].map(map_session)
# Numeric ratings
for c in ["overall", "time_allocation", "conversation_flow", "engagement"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")
# Session day/type
def _session_type(s: str) -> str:
    s = str(s or "")
    if "friday" in s.lower():
        return "Friday"
    if "regular" in s.lower():
        return "Regular"
    return "Other"
df["session_day"] = df["session"].apply(_session_type)
# Basic cleaning
df = df[df["timestamp"].notna()].copy()


df = df.drop(columns=['extra', 'participant_concerns'])


In [84]:
df.to_csv('momod_feedback.csv')

In [85]:
# df[['timestamp', 'date', 'session']]



In [86]:
df['date'].value_counts()

date
2025-10-18    9
2025-09-06    7
2025-09-12    7
2025-09-20    6
2025-10-04    6
Name: count, dtype: int64

In [87]:
df.columns

Index(['timestamp', 'moderator_name', 'session', 'date', 'overall',
       'overall_reason', 'group_participants', 'time_allocation',
       'conversation_flow', 'engagement', 'ratings_reason', 'concerns',
       'date_str', 'date_parsed', 'session_label', 'session_day'],
      dtype='object')