In [2]:
import csv
from datetime import datetime
from collections import defaultdict

# Paths to input files
watch_time_csv = "daily_watch_time.csv"
cycle_data_file = "cycle_data.json"
output_file = "formatted_watch_time.csv"

# Load cycle data
with open(cycle_data_file, "r", encoding="utf-8") as file:
    cycle_data = json.load(file)

# Parse cycles into a list of (start_date, end_date)
cycles = [
    (
        datetime.strptime(cycle["period_start_date"], "%Y-%m-%d %H:%M:%S.%f").date(),
        datetime.strptime(cycle["period_end_date"], "%Y-%m-%d %H:%M:%S.%f").date(),
    )
    for cycle in cycle_data
]

# Load watch time data
watch_time = []
with open(watch_time_csv, "r", encoding="utf-8") as file:
    reader = csv.reader(file)
    next(reader)  # Skip header
    for row in reader:
        date = datetime.strptime(row[0], "%Y-%m-%d").date()
        hours = float(row[1])
        watch_time.append((date, hours))

# Determine cycle status for each day
formatted_data = []
for date, hours in watch_time:
    in_cycle = any(start <= date <= end for start, end in cycles)
    formatted_data.append((date, hours, "Yes" if in_cycle else "No"))

# Group data by month and calculate monthly averages
monthly_data = defaultdict(list)
for date, hours, in_cycle in formatted_data:
    month = date.strftime("%Y-%m")
    monthly_data[month].append((date, hours, in_cycle))

monthly_averages = {}
overall_hours = 0
overall_days = 0
overall_cycle_hours = 0
overall_cycle_days = 0

for month, data in monthly_data.items():
    total_hours = sum(hours for _, hours, _ in data)
    total_days = len(data)
    monthly_averages[month] = total_hours / total_days

    overall_hours += total_hours
    overall_days += total_days

    for _, hours, in_cycle in data:
        if in_cycle == "Yes":
            overall_cycle_hours += hours
            overall_cycle_days += 1

overall_average = overall_hours / overall_days if overall_days else 0
overall_cycle_average = overall_cycle_hours / overall_cycle_days if overall_cycle_days else 0

# Write output
with open(output_file, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Date", "Watch Hours", "In Cycle"])
    writer.writerow(["Overall Average", f"{overall_average:.2f} Hours"])
    writer.writerow(["Overall Cycle Average", f"{overall_cycle_average:.2f} Hours"])

    for month, data in sorted(monthly_data.items()):
        writer.writerow([f"{month} - Average {monthly_averages[month]:.2f} Hours"])
        for date, hours, in_cycle in data:
            avg = overall_cycle_average if in_cycle == "Yes" else monthly_averages[month]
            writer.writerow([date, f"{hours:.2f}", in_cycle, f"-{avg:.2f}"])

print(f"Formatted watch time has been saved to {output_file}.")


Formatted watch time has been saved to formatted_watch_time.csv.
