In [14]:
import asyncio
import json
from playwright.async_api import async_playwright

async def fetch_description(page, video_url):
    try:
        await page.goto(video_url, timeout=30000)
        # Try Open Graph description first
        description_meta = await page.query_selector('meta[property="og:description"]')
        content = await description_meta.get_attribute("content") if description_meta else ""

        # If not found, fallback to name="description"
        if not content:
            description_meta = await page.query_selector('meta[name="description"]')
            content = await description_meta.get_attribute("content") if description_meta else ""

        # Extract the part of the content after the username
        if "TikTok video from" in content:
            colon_index = content.find(":", content.find("TikTok video from"))
            if colon_index != -1:
                return content[colon_index + 1:].strip()
        return None
    except Exception as e:
        print(f"Error fetching {video_url}: {e}")
        return None

async def process_videos(input_file, output_file, concurrency=5):
    with open(input_file, "r") as file:
        data = json.load(file)

    results = {"VideoDescriptions": []}

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context()
        semaphore = asyncio.Semaphore(concurrency)
        tasks = []

        async def process_video(video):
            async with semaphore:
                link = video.get("Link")
                date = video.get("Date")
                if link:
                    print(f"Processing: {link}")
                    page = await context.new_page()
                    description = await fetch_description(page, link)
                    await page.close()
                    results["VideoDescriptions"].append({
                        "Date": date,
                        "Link": link,
                        "Description": description
                    })

        for video in data.get("VideoList", []):
            tasks.append(asyncio.create_task(process_video(video)))

        await asyncio.gather(*tasks)
        await browser.close()

    with open(output_file, "w") as file:
        json.dump(results, file, indent=4)
    print(f"Saved to {output_file}")

# === Set your file paths below ===

input_file = "separated_10/2.json"         # Your input file
output_file = "separated_10/scraped_2.json"  # Your output file

# === Run the coroutine ===

await process_videos(input_file, output_file)


Processing: https://www.tiktokv.com/share/video/7421203634249157906/
Processing: https://www.tiktokv.com/share/video/7420442546020601131/
Processing: https://www.tiktokv.com/share/video/7418962504203701505/
Processing: https://www.tiktokv.com/share/video/7420842557858417927/
Processing: https://www.tiktokv.com/share/video/7420903934836919584/
Processing: https://www.tiktokv.com/share/video/7421212491402792234/
Processing: https://www.tiktokv.com/share/video/7421118786956250401/
Processing: https://www.tiktokv.com/share/video/7420631792153103623/
Processing: https://www.tiktokv.com/share/video/7322413775196409093/
Processing: https://www.tiktokv.com/share/video/7405623352020274462/
Processing: https://www.tiktokv.com/share/video/7407059618620345632/
Processing: https://www.tiktokv.com/share/video/7419373267426757906/
Processing: https://www.tiktokv.com/share/video/7420469475452931371/
Processing: https://www.tiktokv.com/share/video/7415279455297342752/
Processing: https://www.tiktokv.co