In [1]:
import json
from datetime import datetime
from pathlib import Path

# Input file containing the data
input_file = "cleaned_tiktok_data.json"  # Replace with your actual file name

# List of target year files
years = [2024, 2023, 2022, 2021, 2020, 2019]
output_files = {year: f"output_by_year/{year}.json" for year in years}

# Initialize output data structure for each year
year_data = {year: [] for year in years}

try:
    # Load the main input JSON file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
except FileNotFoundError:
    print(f"Error: Input file '{input_file}' not found.")
    exit()
except json.JSONDecodeError as e:
    print(f"Error: Failed to parse JSON file. {e}")
    exit()

# Extract "video browsing history" from the JSON structure
video_list = data.get("Activity", {}).get("Video Browsing History", {}).get("VideoList", [])

if not video_list:
    print("No video browsing history found in the provided JSON file.")
    exit()

# Process each video entry and organize by year
for video in video_list:
    date_str = video.get("Date")
    if date_str:
        try:
            year = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S").year
            if year in year_data:  # Add to the corresponding year if it's one of the target years
                year_data[year].append(video)
        except ValueError as e:
            print(f"Error parsing date '{date_str}': {e}")

# Write data to the respective year files
for year, entries in year_data.items():
    try:
        with open(output_files[year], 'w', encoding='utf-8') as f:
            json.dump({"VideoList": entries}, f, indent=4)
        print(f"Data for {year} written to {output_files[year]}")
    except Exception as e:
        print(f"Error writing file '{output_files[year]}': {e}")

print("Data separation by year complete!")


Data for 2024 written to output_by_year/2024.json
Data for 2023 written to output_by_year/2023.json
Data for 2022 written to output_by_year/2022.json
Data for 2021 written to output_by_year/2021.json
Data for 2020 written to output_by_year/2020.json
Data for 2019 written to output_by_year/2019.json
Data separation by year complete!
