In [5]:
import os
import json
from datetime import datetime
import pandas as pd

def count_videos_in_day(folder_path):
    video_counts = {}

    # Iterate through all JSON files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r') as f:
                data = json.load(f)

                # Process data with a "VideoList"
                if "VideoList" in data:
                    dates = [entry["Date"].split(" ")[0] for entry in data["VideoList"]]

                    # Count occurrences per date
                    for date in dates:
                        if date:
                            video_counts[date] = video_counts.get(date, 0) + 1

    # Create a DataFrame to organize the data
    if video_counts:
        df = pd.DataFrame(list(video_counts.items()), columns=["Date", "VideoCount"])
        df["Date"] = pd.to_datetime(df["Date"])
        df = df.sort_values(by="Date")

        # Calculate monthly statistics
        df['Month'] = df['Date'].dt.to_period('M')
        monthly_averages = df.groupby('Month')["VideoCount"].mean()
        monthly_max = df.groupby('Month').apply(lambda x: x.loc[x["VideoCount"].idxmax()])

        # Calculate overall maximum statistics
        overall_max = df.loc[df["VideoCount"].idxmax()]

        # Write to a file
        output_file = os.path.join(folder_path, "video_statistics.txt")
        with open(output_file, "w") as f:
            # Write daily statistics
            for _, row in df.iterrows():
                f.write(f"{row['Date'].strftime('%Y-%m-%d')} = {row['VideoCount']} videos watched\n")

            # Write monthly average statistics
            f.write("\nMonthly Averages:\n")
            for month, avg in monthly_averages.items():
                f.write(f"{month} = {avg:.2f} videos on average\n")

            # Write maximum statistics for each month
            f.write("\nMonthly Maximums:\n")
            for _, row in monthly_max.iterrows():
                f.write(f"{row['Month']} max: {row['VideoCount']} videos watched on {row['Date'].strftime('%Y-%m-%d')}\n")

            # Write overall maximum statistics
            f.write("\nOverall Maximum:\n")
            f.write(f"{overall_max['VideoCount']} videos watched on {overall_max['Date'].strftime('%Y-%m-%d')}\n")

        print(f"Summary written to: {output_file}")
    else:
        print("No valid data found in the JSON files.")

# Set the folder path containing the JSON files
folder_path = "output_by_month"

# Call the function
count_videos_in_day(folder_path)


Summary written to: output_by_month/video_statistics.txt


  monthly_max = df.groupby('Month').apply(lambda x: x.loc[x["VideoCount"].idxmax()])
