# Get data from the Sema dataset

In [None]:
from pymongo import MongoClient
from bson import ObjectId
import pandas as pd

In [None]:
client = MongoClient("mongodb://localhost:27017")

db = client["lifesnaps"]
collection = db["sema"]

doc = collection.find_one({'data':"place"})
print(doc)  

In [None]:
from datetime import datetime, timedelta
date_to_filter = datetime(2021, 11, 16)
next_day = date_to_filter + timedelta(days=1)
query = {
    "user_id": ObjectId("621e2f9167b776a240011ccb"),
    "data.SURVEY_NAME": 'Context and Mood Survey',
    "data.CREATED_TS": {
        "$gte": date_to_filter,
        "$lt": next_day}
}

In [None]:
docs = collection.find(query,{'user_id':1,'data.SCHEDULED_TS':1,'data.MOOD':1})

for doc in docs:
    print(doc)

In [None]:
query = {
    "user_id": ObjectId("621e2f9167b776a240011ccb"),
    "data.SURVEY_NAME": 'Context and Mood Survey',
}

In [None]:
import pandas as pd

# Data ophalen uit MongoDB (query moet eerder gedefinieerd zijn)
df = pd.DataFrame(list(collection.find(query)))

# 'data'-kolom is al een dict → direct normaliseren
data_expanded = pd.json_normalize(df["data"])

# Combineer met originele ID-kolommen
df = pd.concat([df[["_id", "user_id"]], data_expanded], axis=1)

# Exporteren naar CSV
df.to_csv('semaSurvey.csv', index=False)

# Bekijk de eerste paar rijen
df.head()

# Data selection 
only select moods and give them a score for each day

In [None]:
data = df[['CREATED_TS','MOOD']]
data

mood_map = {
    '<no-response>': None,
    'ALERT': 1,
    'ANGER': -1,
    'FEAR': -1,
    'HAPPY': 1,
    'JOY': 1,
    'NEUTRAL': 0,
    'RESTED/RELAXED': 1,
    'SAD': -1,
    'SADNESS': -1,
    'SURPRISE': 0,
    'TENSE/ANXIOUS': -1,
    'TIRED': -1,
    None: None  # voor echte null-waarden
}

data['mood_score'] = data['MOOD'].map(mood_map)

In [None]:
data["CREATED_TS"] = pd.to_datetime(data["CREATED_TS"])
# Voeg een kolom toe voor de datum (zonder tijd)
data["date"] = data["CREATED_TS"].dt.date


# Groepeer op datum en sommeer de mood scores
daily_scores = data.groupby("date")["mood_score"].sum().reset_index()

daily_scores

In [None]:
daily_scores.to_csv("csv/Mood.csv",index=False)

In [None]:
import matplotlib.pyplot as plt

# Zorg dat de datums in goede volgorde staan
daily_scores = daily_scores.sort_values("DATE")

# Plot de som van de MOOD scores per dag
plt.figure(figsize=(12, 6))
plt.plot(daily_scores["DATE"], daily_scores["MOOD_Score"], marker='o', linestyle='-', color='purple')
plt.title("Mood Score per Dag (Som van Scores)")
plt.xlabel("Datum")
plt.ylabel("Mood Score")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
