In [3]:
import os
import json
from datetime import datetime, timedelta
from collections import defaultdict
import csv

# Directory containing monthly watch history files
directory = "output_by_month"

# Path to the cycle data file
cycle_data_file = "cycle_data.json"

# Load cycle data
with open(cycle_data_file, "r") as file:
    cycle_data = json.load(file)

# Parse cycle start and end dates
cycles = [
    (
        datetime.strptime(cycle["period_start_date"], "%Y-%m-%d %H:%M:%S.%f"),
        datetime.strptime(cycle["period_end_date"], "%Y-%m-%d %H:%M:%S.%f")
    )
    for cycle in cycle_data
]

# List to store all data
all_data = []

# Check if the directory exists
if not os.path.exists(directory):
    print(f"Directory '{directory}' not found. Please ensure it exists and contains the data files.")
else:
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            file_path = os.path.join(directory, filename)
            
            # Initialize dictionary to store total watch hours per day for the current month
            watch_time_per_day = defaultdict(float)
            
            # Read the JSON file
            with open(file_path, "r") as file:
                data = json.load(file)
            
            # Extract and process each video entry
            for entry in data.get("VideoList", []):
                watch_date = datetime.strptime(entry["Date"], "%Y-%m-%d %H:%M:%S")
                day = watch_date.date()  # Extract the date part
                watch_time_per_day[day] += 1 / 60  # Assuming each video is 1 minute

            # Determine cycle days
            cycle_days = set()
            for start, end in cycles:
                for day in watch_time_per_day.keys():
                    if start.date() <= day <= end.date():
                        cycle_days.add(day)

            # Add data to the list
            for day in sorted(watch_time_per_day.keys()):
                is_cycle_day = "Yes" if day in cycle_days else "No"
                all_data.append((day, round(watch_time_per_day[day], 2), is_cycle_day))

# Sort all data by date
all_data.sort(key=lambda x: x[0])

# Write all data to a single CSV file
output_csv_file = "combined_watch_time.csv"
with open(output_csv_file, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Date", "Watch Hours", "Cycle Day"])  # Write header row
    for row in all_data:
        writer.writerow(row)

print(f"All data combined, ordered, and saved to {output_csv_file}.")


All data combined, ordered, and saved to combined_watch_time.csv.
