In [4]:
import json
from datetime import datetime
from pathlib import Path

# Load the JSON file
input_file = "cleaned_tiktok_data.json"  # Ensure this file exists in the root folder of your Codespace
output_dir = "output_by_year"  # Directory to save year-based files

# Ensure the output directory exists
Path(output_dir).mkdir(parents=True, exist_ok=True)

try:
    # Read JSON data
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"Error: Input file '{input_file}' not found.")
    exit()
except json.JSONDecodeError as e:
    print(f"Error: Failed to parse JSON file. {e}")
    exit()

# Navigate to the "video browsing history" under "activity"
video_list = data.get("Activity", {}).get("Video Browsing History", {}).get("VideoList", [])

if not video_list:
    print("No video browsing history found in the provided JSON file.")
    exit()

# Organize videos by year
videos_by_year = {}

for video in video_list:
    date_str = video.get("Date")
    if date_str:
        try:
            year = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").year
            if year not in videos_by_year:
                videos_by_year[year] = []
            videos_by_year[year].append(video)
        except ValueError as e:
            print(f"Error parsing date '{date_str}': {e}")

# Write each year's data to separate JSON files
for year, videos in videos_by_year.items():
    output_file = Path(output_dir) / f"{year}.json"
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump({"VideoList": videos}, f, indent=4)
        print(f"File written: {output_file}")
    except Exception as e:
        print(f"Error writing file '{output_file}': {e}")

print(f"Data has been organized and saved in the directory '{output_dir}'.")


File written: output_by_year/2024.json
Data has been organized and saved in the directory 'output_by_year'.
