In [2]:
import os
import json
from datetime import datetime, timedelta
import csv

# Paths to the login history and watch history files
login_history_file = "other_data_parts/login_history.json"
watch_history_folder = "output_by_month"
output_csv_file = "daily_watch_time2.csv"

# Load login history
with open(login_history_file, "r") as file:
    login_data = json.load(file)["LoginHistoryList"]

# Parse login timestamps
login_sessions = [
    datetime.strptime(login["Date"], "%Y-%m-%d %H:%M:%S")
    for login in login_data
]
login_sessions.sort()

# Collect video watch data from all months
video_watches = []
for filename in os.listdir(watch_history_folder):
    if filename.endswith(".json"):
        with open(os.path.join(watch_history_folder, filename), "r") as file:
            data = json.load(file)
            video_watches.extend(data["VideoList"])

# Parse video timestamps
video_timestamps = [
    datetime.strptime(video["Date"], "%Y-%m-%d %H:%M:%S")
    for video in video_watches
]
video_timestamps.sort()

# Calculate session-based watch time
daily_watch_time = {}

for i in range(len(login_sessions) - 1):
    session_start = login_sessions[i]
    session_end = login_sessions[i + 1]
    
    # Find videos watched during this session
    session_videos = [
        ts for ts in video_timestamps
        if session_start <= ts <= session_end
    ]
    
    if session_videos:
        session_actual_start = min(session_videos)
        session_actual_end = max(session_videos)
        
        # Adjust for sessions spanning multiple days
        start_day = session_actual_start.date()
        end_day = session_actual_end.date()
        
        if start_day == end_day:
            # Single-day session
            daily_watch_time[start_day] = daily_watch_time.get(start_day, 0) + (
                session_actual_end - session_actual_start
            ).total_seconds() / 3600
        else:
            # Split session across days
            midnight = datetime.combine(start_day + timedelta(days=1), datetime.min.time())
            daily_watch_time[start_day] = daily_watch_time.get(start_day, 0) + (
                midnight - session_actual_start
            ).total_seconds() / 3600
            daily_watch_time[end_day] = daily_watch_time.get(end_day, 0) + (
                session_actual_end - midnight
            ).total_seconds() / 3600

# Write daily totals to CSV
with open(output_csv_file, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Date", "Watch Hours"])
    for date, hours in sorted(daily_watch_time.items()):
        writer.writerow([date, round(hours, 2)])

print(f"Daily watch time has been saved to {output_csv_file}.")


Daily watch time has been saved to daily_watch_time2.csv.
