<a href="https://colab.research.google.com/github/vanessajen/Sam-tracker-v4/blob/main/Tiktok_Reeditor_V11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# TikTok Shop T-Shirt Tracker v13 PRO + Re-Editor Sam Trendy 01 + Real Scraper TikTok Shop by Claude
# By Sam for Jenny ðŸ’›
# Auto: scrape 60 T-shirt POD products real from TikTok Shop â†’ smart TikTok crawler (yt-dlp) â†’ filter videos with â‰¥1k likes, no watermark â†’ smart 15-20s cut â†’ remix 2-3 clips â†’ trendy music â†’ auto thumbnail â†’ full status report to Telegram

# === INSTALL LIBRARIES ===
!pip install yt-dlp playwright
!playwright install chromium

# === CONNECT GOOGLE DRIVE ===
from google.colab import drive
drive.mount('/content/drive')

import requests
import pandas as pd
from datetime import datetime
import os
import json
import subprocess
import random
import time
import re
from typing import List, Dict, Any
from playwright.sync_api import sync_playwright, Page

# === CONFIG ===
HISTORY_FOLDER = "sales_history"
EXPORT_CSV_PATH = "tshirt_growth_report.csv"
VIDEO_DOWNLOAD_FOLDER = "downloaded_videos"
FINAL_VIDEO_OUTPUT = "reup_video_today_trendy_cut_scene.mp4"
FINAL_VIDEO_WITH_MUSIC = "reup_video_final_with_music.mp4"
THUMBNAIL_IMAGE = "thumbnail.jpg"
TRENDY_MUSIC_FOLDER = "/content/drive/MyDrive/music"
MAX_PRODUCTS = 60

# === TELEGRAM CONFIG ===
TELEGRAM_BOT_TOKEN = "8118142644:AAEVkSRX8ACgeCsCS9h3IjC8Kraf_v_poPY"
TELEGRAM_CHAT_ID = "1157657879"

# === CREATE FOLDER ===
os.makedirs(HISTORY_FOLDER, exist_ok=True)
os.makedirs(VIDEO_DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(TRENDY_MUSIC_FOLDER, exist_ok=True)

# === MUSIC FILES ===
TRENDY_MUSIC = [
    os.path.join(TRENDY_MUSIC_FOLDER, f) for f in os.listdir(TRENDY_MUSIC_FOLDER) if f.endswith('.mp3')
]

# === SMART ENGLISH HOOKS ===
HOOKS = [
    "You won't believe this T-shirt!",
    "This design is taking over the US!",
    "Top trending tee today âœ¨",
    "Everyone is obsessed with this shirt!",
    "The viral T-shirt you must have!",
    "Don't miss this amazing deal!",
    "The hardest shirt doesn't exist... or does it?",
    "Sorry for those who bought it yesterday â€“ today it's on SALE!"
]

# === FUNCTION: Send telegram ===
def send_to_telegram(message):
    url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage"
    payload = {"chat_id": TELEGRAM_CHAT_ID, "text": message, "parse_mode": "HTML"}
    requests.post(url, data=payload)

# === SCRAPER CLASS ===
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0"
]

SEARCH_QUERIES = ["t-shirt", "tshirt", "t shirt", "tee shirt", "graphic tee", "t-shirts", "cotton shirt", "casual shirt"]
SORT_OPTIONS = ["", "&sort=sales_high_to_low", "&sort=price_low_to_high", "&sort=recent"]
FILTER_OPTIONS = ["", "&minPrice=10&maxPrice=30", "&rating=4up"]

class TikTokShopScraper:
    def __init__(self, headless: bool = True, region: str = "US"):
        self.headless = headless
        self.region = region
        self.products = []
        self.max_products = MAX_PRODUCTS

    def get_random_user_agent(self):
        return random.choice(USER_AGENTS)

    def get_random_search_query(self):
        base_query = random.choice(SEARCH_QUERIES)
        sort_option = random.choice(SORT_OPTIONS)
        filter_option = random.choice(FILTER_OPTIONS) if random.random() > 0.7 else ""
        return f"{base_query}{sort_option}{filter_option}"

    def random_sleep(self, min_sec=1.0, max_sec=3.5):
        time.sleep(min_sec + random.random() * (max_sec - min_sec))

    def extract_product_data(self, page: Page):
        products = []
        try:
            page.wait_for_selector('[data-e2e="search-card-product-item"]', timeout=15000)
            products_data = page.evaluate("""
                () => {
                    const products = [];
                    const productCards = document.querySelectorAll('[data-e2e="search-card-product-item"]');
                    productCards.forEach(card => {
                        try {
                            const titleElement = card.querySelector('.css-1bjwylw');
                            const priceElement = card.querySelector('.css-1rhsq2v');
                            const imageElement = card.querySelector('img');
                            const linkElement = card.querySelector('a');
                            if (titleElement && priceElement) {
                                products.push({
                                    title: titleElement.textContent.trim(),
                                    price: priceElement.textContent.trim(),
                                    image_url: imageElement ? imageElement.src : null,
                                    product_url: linkElement ? linkElement.href : null
                                });
                            }
                        } catch (e) {}
                    });
                    return products;
                }
            """)
            for product in products_data:
                if product and product.get('title') and product.get('price'):
                    products.append(product)
        except Exception as e:
            print(f"Error extracting product data: {e}")
        return products

    def scrape(self):
        with sync_playwright() as playwright:
            browser = playwright.chromium.launch(headless=self.headless)
            context = browser.new_context(
                user_agent=self.get_random_user_agent(),
                viewport={'width': 1366 + random.randint(-50, 50), 'height': 768 + random.randint(-50, 50)},
                locale=random.choice(['en-US', 'en-GB']),
                timezone_id=random.choice(['America/New_York', 'Europe/London'])
            )
            page = context.new_page()
            attempts = 0
            max_attempts = 5

            while len(self.products) < self.max_products and attempts < max_attempts:
                attempts += 1
                search_query = self.get_random_search_query()
                search_url = f"https://www.tiktok.com/search?q={search_query}"
                try:
                    page.goto(search_url, wait_until="domcontentloaded")
                    self.random_sleep(2, 4)
                    if random.random() > 0.5:
                        for _ in range(random.randint(1, 3)):
                            page.mouse.wheel(0, random.randint(300, 700))
                            self.random_sleep(0.5, 1.5)
                    new_products = self.extract_product_data(page)
                    if new_products:
                        self.products.extend(new_products)
                        seen_titles = set()
                        unique_products = []
                        for p in self.products:
                            if p['title'] not in seen_titles:
                                seen_titles.add(p['title'])
                                unique_products.append(p)
                        self.products = unique_products[:self.max_products]
                except Exception as e:
                    print(f"Error during scraping: {e}")
                    self.random_sleep(10, 15)

            browser.close()
            return self.products

# === MAIN EXECUTION ===

today_str = datetime.today().strftime('%Y-%m-%d')
today_path = f"{HISTORY_FOLDER}/{today_str}.json"

if os.path.exists(today_path):
    with open(today_path) as f:
        today_data = json.load(f)
    send_to_telegram("âœ… Found today's sales data. Proceeding...")
else:
    send_to_telegram("ðŸ†• No today's data found. Scraping real TikTok Shop products now!")
    scraper = TikTokShopScraper()
    today_data = scraper.scrape()
    with open(today_path, 'w') as f:
        json.dump(today_data, f, ensure_ascii=False, indent=2)
    send_to_telegram("âœ… Real TikTok Shop products scraped and saved.")

# === (The rest of re-editor video + telegram report continues below...) ===
