In [7]:
import os
import pandas as pd
from openpyxl import Workbook
import json
import requests
from deepface import DeepFace
from apify_client import ApifyClient
import shutil
from openai import OpenAI
from utils import process_single_collaborator
from datetime import datetime

import unicodedata
import re
import pandas as pd
from typing import Dict, List, Set

def clean_filename(filename):
    # Remove all leading/trailing whitespace
    cleaned = filename.strip()
    # Normalize Unicode characters (e.g., combining characters)
    cleaned = unicodedata.normalize('NFKC', cleaned)
    # Remove any non-printable ASCII characters (except common ones like newline if needed, but not for filenames)
    # This is a bit aggressive and might remove valid characters, use with caution.
    # For filenames, focusing on specific problematic characters is better.
    # Or, more simply, filter out control characters:
    cleaned = ''.join(c for c in cleaned if c.isprintable()) # or not unicodedata.category(c).startswith('C')
    return cleaned # Or keep original case for path if needed


def get_apify_limit(apify_key):

    url = "https://api.apify.com/v2/users/me/limits"

    bearer_string = "Bearer " + apify_key
    payload = {}
    headers = {
        'Accept': 'application/json',
        'Authorization': bearer_string
    }

    response = requests.request("GET", url, headers=headers, data=payload)
    data = response.json()

    return data["data"]["current"]["monthlyUsageUsd"]
def get_average_statistics(profile_data):
    """
    Function to return average profile statistics
    Args:
        - profile_data: profile data returned from JSON
    """
    if profile_data is None:
        return None, None, None, None
    posts_list = profile_data.get("latestPosts",[])
    likes_list = []
    comments_list = []
    views_list = []
    engagement_list = []
    followers = profile_data.get("followersCount",0)
    for post in posts_list:
        likes_list.append(post.get("likesCount",0))
        comments_list.append(post.get("commentsCount",0))
        views_list.append(post.get("viewsCount",0))
        if followers >0:
            engagement_list.append(100*(post.get("likesCount",0) + post.get("commentsCount",0)) / (followers))
    
    avg_likes = None
    avg_comments = None
    avg_views = None
    avg_engagement_rate = None
    if len(likes_list) > 0:
            avg_likes = sum(likes_list) / len(likes_list)
    if len(comments_list) > 0:
        avg_comments = sum(comments_list) / len(comments_list)
    if len(views_list) > 0:
        avg_views = sum(views_list) / len(views_list)
    if len(engagement_list) > 0:
        avg_engagement_rate = sum(engagement_list) / len(engagement_list)
    return avg_likes, avg_comments, avg_views, avg_engagement_rate

def process_collaborators(output_base_dir):

    collaborators_list = [f for f in os.listdir(os.path.join(output_base_dir, "collaborators")) 
                     if not f.startswith('.') and f != '.DS_Store']
    
    APIFY_KEYS = []
    with open("apify_tokens.txt", 'r') as file:
        # Read each line and strip any whitespace
        APIFY_KEYS = [line.strip() for line in file if line.strip()]

    # loop over all the apify keys, find the key that has usage limit more than $1
    current_apify_key = None
    for key in APIFY_KEYS:
        usage = get_apify_limit(key)
        if usage <=4:
            current_apify_key = key
            break
    if current_apify_key is None:
        print("No key with usage limit more than $1 found")
        return
        
    # Loop over the collaborators folder
    for c in collaborators_list:

        profile_path = os.path.join(output_base_dir, "collaborators", c, "profile.json")
        photo_path = os.path.join(output_base_dir, "collaborators", c, "profile.jpg")

        print("Processing collaborator...", c)
        if os.path.exists(profile_path) and os.path.exists(photo_path):
            print("Already processed, skipping...",c)
            continue
        # if not, then get the JSON from Apify

        # Initialize the ApifyClient
        client = ApifyClient(current_apify_key)

        # Prepare the Actor input
        run_input = { "usernames": [c] }
        # Run the Actor and wait for it to finish
        run = client.actor("dSCLg0C3YEZ83HzYX").call(run_input=run_input)

        # save json and image
        with open(profile_path, 'w') as json_file:
            for item in client.dataset(run["defaultDatasetId"]).iterate_items():
                json.dump(item, json_file, indent=4)
                if item.get('profilePicUrl'):
                    try:
                        response = requests.get(item['profilePicUrl'])
                        if response.status_code == 200:
                            with open(photo_path, 'wb') as photo_file:
                                photo_file.write(response.content)
                            print(f"Profile photo saved for {c}")
                        else:
                            print(f"Failed to download profile photo for {c}: HTTP {response.status_code}")
                    except Exception as e:
                        print(f"Error downloading profile photo for {c}: {str(e)}")

                break  # Added break since we only need first item   

def get_deep_face_output(image_path):
    """
    Function to get deep face output. Returns select fields from the following:
    f is human, query deep face and obtain these fields:
    [{'age': int,
    'region': {'x': int,
    'y': int,
    'w': int,
    'h': int,
    'left_eye': None,
    'right_eye': None},
    'face_confidence': float,
    'gender': {'Woman': float, 'Man': float},
    'dominant_gender': 'Woman',
    'race': {'asian': float,
    'indian': float,
    'black': float,
    'white': float,
    'middle eastern': float,
    'latino hispanic': float},
    'dominant_race': 'asian',
    'emotion': {'angry': float,
    'disgust': float,
    'fear': float,
    'happy': float,
    'sad': float,
    'surprise': float,
    'neutral': float},
    'dominant_emotion': string}]
    
    Args:
        image_path: path to the image
    Returns:
        age: int
        gender: string
        dominant_race: string
        second_dominant_race: string
        dominant_emotion: string
    """
    print("Processing deep face on", image_path)

    result = DeepFace.analyze(
        img_path = image_path, actions = ['age', 'gender', 'race', 'emotion'], enforce_detection=False, detector_backend = "retinaface"
    )
    if isinstance(result, list):

        if result[0]['face_confidence'] < 0.5:
            return False, None, None, None, None, None
    
        age = result[0]['age']
        gender = result[0]['dominant_gender']
        dominant_race = result[0]['dominant_race']
        second_dominant_race = sorted(result[0]['race'].items(), key=lambda x: x[1], reverse=True)[1][0]
        dominant_emotion = result[0]['dominant_emotion']
        
        return True, age, gender, dominant_race, second_dominant_race, dominant_emotion
    else:
        return False, None, None, None, None, None

def create_folders_for_handles(geography):
    """
    Function to loop over a json, fetch username, and if the username folder does not exist in collaborators folder,
    create a folder for it.
    """
    # Path to the JSON file and collaborators directory
    json_path = "username-filter.influencer_data.json"
    collaborators_dir = "./data/{geography}/collaborators"

        
    # Read the JSON file
    try:
        with open(json_path, 'r') as file:
            data = json.load(file)
            
            # Loop through each username in the JSON
            for profile in data:
                username = profile["username"]
                # Create folder path
                folder_path = os.path.join(collaborators_dir, username)
                
                # Create folder if it doesn't exist
                if not os.path.exists(folder_path):
                    os.makedirs(folder_path)
                    print(f"Created folder for {username}")
                else:
                    print(f"Folder for {username} already exists")
    except FileNotFoundError:
        print(f"Error: {json_path} not found")
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {json_path}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

def save_profile_details(handle, is_human, age, gender, dominant_race, second_dominant_race, dominant_emotion,followersCount,geography):
    if is_human:
        human_or_brand = "human"
        tier = "influencer"

        if followersCount > 1000:
            
            if followersCount < 10000:
                tier = "nano"
            elif followersCount < 100000:
                tier = "micro"
            elif followersCount < 500000:
                tier = "macro"
            elif followersCount < 1000000:
                tier = "mega"
            else:
                tier = "celebrity"
        
        # check if UGC creator
        if "ugc" in profile_data.get("biography", "").lower():
            tier = "ugc," + tier
            
    else:
        tier = "brand"
        human_or_brand = "brand"
    tier = ",".join(set(tier.split(",")))
    profile_details_ai = {
        "tier": tier,
        "human_or_brand": human_or_brand,
        "age": age,
        "gender": gender,
        "dominant_race": dominant_race,
        "second_dominant_race": second_dominant_race,
        "dominant_emotion": dominant_emotion
    }
    # save the json
    with open(os.path.join("data",geography,"collaborators", handle, "profile_details.json"), "w") as f:
        json.dump(profile_details_ai, f)
    
    return tier, human_or_brand
    
def human_or_brand_from_name_bio(handle,geography):
    """
    Function to whether a person is human or not from the bio
    """
    
    SUTRA_API_KEY="su_huzopoC8sVp556LWODhHpjb0g8K_ArF5q_KSXIKB8Ns0Mjk251UpydDgp9BU"
    url = 'https://api.two.ai/v2'
    
    client = OpenAI(base_url=url, api_key=SUTRA_API_KEY)
    # load the profile data
    profile_path = os.path.join("data",geography, "collaborators", handle, "profile.json")
    with open(profile_path, "r") as f:
        profile_data = json.load(f)
    
    name = profile_data.get("fullName","")
    bio = profile_data.get("biography","")
    
    prompt =f"""
        You are an expert social media profile analyst, specializing in identifying genuine human accounts and inferring basic demographic information from limited textual data.
        Your task is to analyze the provided Instagram profile details to determine if it belongs to a real human, and if so, to infer the most likely gender.

        **Profile Data:**
        - **Username:** "{handle}"
        - **Display Name:** "{name}"
        - **Bio Text:** "{bio}"

        **Analysis Steps:**
        1.  **Human or Not Classification:** Assess the username, display name, and especially the bio text.
            * **Look for Human Signals:** Personal interests, specific life roles (e.g., student, artist, professional), expressive language, varied and non-generic descriptions, absence of overt sales pitches or spam.
            * **Look for Not Human Signals:** Generic promotional language, cryptocurrency offers, "DM to earn," highly repetitive phrases, suspicious external links not typical for a personal profile, lack of personal context suggesting an automated, spam, or otherwise non-personal account.
            * **Prioritize Bio:** The bio is often the strongest indicator.
        2.  **Gender Inference (if Human):** If the profile is confidently classified as human, attempt to infer the most likely gender (MALE, FEMALE, or UNDETERMINED).
            * **Primary Indicator:** Rely heavily on common gender associations with the Display Name and Username.
            * **Secondary Indicator (Subtle):** Very subtle linguistic cues in the bio (use with extreme caution, acknowledge inherent unreliability).
            * **Default:** If there is no strong indicator, or if the name is gender-neutral/unknown, default to "UNDETERMINED".
            * **Crucial Caveat:** Acknowledge that gender inference from text alone is limited, potentially inaccurate, and does not account for all gender identities.
        3.  **Format Output:** Compile the findings into the specified JSON object.

        **Output Format:**
        Your output MUST be a JSON object with the exact keys specified below. Values for each key must strictly adhere to the defined types.
        Below is an example of the output format:
        ```json
        {{
            "is_human": true,   // boolean: true if human, false if not human
            "gender": "UNDETERMINED" // string: "MALE", "FEMALE", or "UNDETERMINED"
        }}
        ```

        **Guidelines for Each Key:**

        * **is_human:**
            * `true`: The profile exhibits strong signs of being a genuine human user.
            * `false`: The profile exhibits strong signs of being an automated, spam, or otherwise not human account.
        * **gender:**
            * `"MALE"`: Strongly inferred to be male based on common name associations.
            * `"FEMALE"`: Strongly inferred to be female based on common name associations.
            * `"UNDETERMINED"`: No strong gender inference possible from the text, name is gender-neutral, or the profile is classified as not human.

        Provide only the JSON output. Do NOT include any other conversational text or explanations.
        """

    stream = client.chat.completions.create(model='sutra-v2',
            messages = [{"role": "user", "content": prompt}],
            max_tokens=1024,
            temperature=0,
            stream=True
    )
    is_human = False
    gender = None

    for chunk in stream:
        if len(chunk.choices) > 0:
            content = chunk.choices[0].delta.content
            
            finish_reason = chunk.choices[0].finish_reason
            # print(content, finish_reason)
            if content and finish_reason is None:
                # content from sutra comes in chunks, hence every time query seperately
                if "true" in content.lower():
                    is_human = True
                if "female" in content.lower():
                    gender = "Woman"
                elif "male" in content.lower():
                    gender = "Man"
                

    return is_human, gender



    
    
if __name__ == "__main__":

    # print("Creating profile folders for non-existent ones..")
    # create_folders_for_handles()

    # print("Scraping data...")
    # process_collaborators("./data")

    geography = "india"
    # geography = "us_canada"
    folder_path = os.path.join("data",geography,"collaborators")

    # Define fields for your DataFrame/Excel
    fields = [
    "username",
    "full_name",
    "biography",
    "followers_count",
    "follows_count",
    "tier",
    "human_or_brand",
    "age",
    "gender",
    "avg_likes",
    "avg_comments",
    "avg_views",
    "avg_engagement_rate",
    ]

    # create a data frame
    df = pd.DataFrame(columns=fields)
    
    processed_files = 0
    count = 0
    handle_list = os.listdir(folder_path)
    handle_list = [clean_filename(handle) for handle in handle_list]
    handle_list=list(set(handle_list))
    handle_list.sort()
    total_files = len(handle_list)
    added_handles = set()
    for creator in handle_list:
        print("################################################")
        # skip if .DS_Store
        creator = creator.strip().lower()
        if creator == ".DS_Store".lower() or creator in added_handles:
            continue
        processed_files += 1
        
        added_handles.add(creator)    
        print("Processing creator number", processed_files, "out of total", total_files)
    
        # get the full path of the files
        profile_details_path = os.path.join(folder_path, creator, "profile.json")
        profile_details_path_ai = os.path.join(folder_path, creator, "ai_details.json")
        profile_pic_path = os.path.join(folder_path, creator, "profile.jpg")
        
        if not os.path.exists(profile_pic_path):
            # we check only on profile pic - as profile.json gets saved even when incorrect handle
            flag = False
            print("Creator data not present", creator, " trying to download...")
            # call apify to download it
            process_single_collaborator(creator)
            if os.path.exists(profile_pic_path):
                flag = True
            if not flag:
                # delete the creator
                print("Incorrect handle, deleting it", creator)
                shutil.rmtree(os.path.join(folder_path, creator))

                continue


        # load the json file
        with open(profile_details_path, "r") as f:
            profile_data = json.load(f)

        # get the fields
        username = profile_data.get("username","")
        full_name = profile_data.get("fullName","")
        biography = profile_data.get("biography","")
        followers_count = profile_data.get("followersCount",0)
        follows_count = profile_data.get("followsCount",0)
        
        if os.path.exists(profile_details_path_ai):
            print("Already processed, adding to DF, continue ... ")
            # load the info
            with open(profile_details_path_ai, "r") as f:
                profile_details_ai = json.load(f)
            tier = profile_details_ai.get("tier", "")

            # check if UGC creator
            ## first check if human, and then check if bio has ugc mentioned
            if profile_details_ai.get("human_or_brand", "") == "human":
                if "ugc" in profile_data.get("biography", "").lower():
                    tier = "ugc," + tier
            #split by comma, and remove duplicates - NEEDED IN CASE THE CODE IS RERUN
            tier = ",".join(set(tier.split(",")))
            human_or_brand = profile_details_ai.get("human_or_brand", "")
            age = profile_details_ai.get("age", None)
            gender = profile_details_ai.get("gender", None)

            # get average statistics
            avg_likes, avg_comments, avg_views, avg_engagement_rate = get_average_statistics(profile_data)

            df.loc[len(df)] = [
                username,
                full_name,
                biography,
                followers_count,
                follows_count,
                tier,
                human_or_brand,
                age,
                gender,
                avg_likes,
                avg_comments,
                avg_views,
                avg_engagement_rate

            ]
            
            profile_details_ai["tier"] = tier

            # save the json
            with open(profile_details_path_ai, "w") as f:
                json.dump(profile_details_ai, f)
            continue
            

        
        

        is_human_namebio = False
        is_human_profilepic = False
        age = None
        gender = None
        dominant_race = None
        second_dominant_race = None
        dominant_emotion = None
        avg_likes = None
        avg_comments = None
        avg_views = None
        avg_engagement_rate = None
        print("Running deepface on profile pic", creator)

        is_human_profilepic, age, gender, dominant_race, second_dominant_race, dominant_emotion = get_deep_face_output(os.path.join("./data/",geography,"collaborators", creator, "profile.jpg"))
        
        if not is_human_profilepic:
            is_human_namebio, gender = human_or_brand_from_name_bio(username,geography)

        tier, human_or_brand = save_profile_details(creator, is_human_profilepic or is_human_namebio, age, gender, dominant_race, second_dominant_race, dominant_emotion,profile_data.get("followersCount",0),geography)

        # get average statistics
        avg_likes, avg_comments, avg_views, avg_engagement_rate = get_average_statistics(profile_data)
        # add the row to the dataframe
        df.loc[len(df)] = [
            username,
            full_name,
            biography,
            followers_count,
            follows_count,
            tier,
            human_or_brand,
            age,
            gender,
            avg_likes,
            avg_comments,
            avg_views,
            avg_engagement_rate
        ]
        print("################################################")

    # write to excel file
    wb = Workbook()
    ws = wb.active
    for col, field in enumerate(fields):
        ws.cell(row=1, column=col+1).value = field

    for index, row in df.iterrows():
        for col, val in enumerate(row):
            ws.cell(row=index+2, column=col+1).value = val

    # while saving, add the date YYYY-MM-DD

    current_time = datetime.now().strftime("%Y-%m-%d")
    wb.save(f"creators_{geography}_{current_time}.xlsx")

ModuleNotFoundError: No module named 'utils'

In [None]:
import os
import json
from openai import OpenAI
from datetime import datetime

GEOGRAPHY = "india"
BASE_DIR = "/home/yash-sisodia/face-detection/collaborators"

client = OpenAI(
    base_url='https://api.two.ai/v2',
    api_key='sutra_gZTZZ2jcFUpLrXFsUfUajWbSUCgYhcrG6ypJuJTv27wKBUcGMwqGxP2Adgyj'
)

def classify_by_bio(handle, profile_path):
    with open(profile_path, "r") as f:
        profile_data = json.load(f)

    name = profile_data.get("fullName", "")
    bio = profile_data.get("biography", "")

    prompt = f'''
    You are a social media analyst. Given a display name and bio, determine if this is a real human or a brand.

    Name: {name}
    Bio: {bio}

    Only return true (if human) or false (if not human).
    '''

    stream = client.chat.completions.create(
        model='sutra-v2',
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=10,
        stream=True
    )

    for chunk in stream:
        if "true" in chunk.choices[0].delta.content.lower():
            return True
        elif "false" in chunk.choices[0].delta.content.lower():
            return False

    return False

def main():
    handles = [d for d in os.listdir(BASE_DIR) if os.path.isdir(os.path.join(BASE_DIR, d))]
    output = {}

    creator_handles = [
        "_.dracarys.x_",
        "_.harshitha_.gowda._",
        "_.heartwants._",
        "_.hett",
        "_.keshavi._",
        "_.kirranchauhan._",
        "_.life_in_pastel._",
        "_.manisha_kulal",
        "_.mile.y",
        "_.minney__06",
        "_.miss_komal_.1026",
        "_.nickyy75_",
        "_.nutrihealth",
        "_.palakvasudeva._",
        "_.phenomenail",
        "_.poojjja._",
        "_.priyanshiii01",
        "_.ranideepa._",
        "_.samridhii_",
        "_.shikhachaudhary._",
        "_.shobhomita",
        "_.shrush_",
        "_.srilakshmi.__",
        "_.sudeshnax.__",
        "_.tanisha._.22._",
        "_.vandana._03",
        "_3.4ooo",
        "__.aaddyyyyyyyy",
        "__.aishi_",
        "__.ambiverttt",
        "__.anuraaag",
        "__.mitara.b.__",
        "__.prachi.__26",
        "__.sannss",
        "__.shammu_zz",
        "__.soulzy",
        "__.tashaa.__",
        "___.ishpreet",
        "____.jagriti.___",
        "______jasmine1_______",
        "_____keppy_____",
        "_____tequila_girl",
        "____sanjana___boro____",
        "____vartikaa___",
        "___aamu___05",
        "___antara__ghosh___",
        "___chinmoy_bora",
        "___dhanu07___",
        "___dubey___ji___",
        "___jasmine_05",
        "___jelly_bean___",
        "___kamakshiiiiii",
        "___malemnganbiii___",
        "___maxumilian___",
        "___mikku___",
        "___nikhat____",
        "___notyourcupoftea___",
        "___riyuhhh___",
        "___sadah____",
        "___simran__official___",
        "___srn_9779_",
        "___tannu20",
        "___tripathi__",
        "___twinkling___",
        "___whatarethose",
        "__akshitagupta",
        "__ankitaa._____",
        "__apurvapawar_",
        "__ar_yaaa__",
        "__barbie_grl",
        "__baruah__96",
        "__beauty__squad__",
        "__bellabeautyy",
        "__bhumi_thakur___",
        "__bint.abdullah__",
        "__brownie_love__",
        "__chachiii_",
        "__chikka",
        "__clematiss__",
        "__darshan____0024",
        "__devlina__",
        "__dr__.aastha___.yadav",
        "__embxx",
        "__estrellaaaa.__",
        "__hussainujjain",
        "__iamleeba._",
        "__ishannnnn._",
        "__ishikasachdeva__",
        "__ishsh__",
        "__jenny_pereira__",
        "__jess_mavalia__",
        "__jessferns__43",
        "__kajal.bisht__",
        "__kalitapriyamr__",
        "__kaur___simran",
        "__khushiiiii__3",
        "__kripali",
        "__lil.jannat",
        "__lucy.2.0__",
        "__ma.dh.u___"
    ]

    brand_handles = [
        "mynykaa",
        "nykaanaturals",
        "nykaaskinrx",
        "nykaacosmetics",
        "wowskinscienceindia",
        "mamaearth.in",
        "letspurplle",
        "asabeautyindia",
        "trysugar",
        "nathabit.in",
        "discover.pilgrim",
        "vlccin",
        "forestessentials",
        "faebeautyofficial",
        "thebodyshopindia",
        "lovecolorbar",
        "biotique_world",
        "maccosmeticsindia",
        "revlon_india",
        "bobbibrownindia",
        "himalaya_facecare",
        "plumgoodness",
        "plumbodylovin",
        "disguisecosmetics",
        "myglamm",
        "coloressenceofficial",
        "rubys.organics",
        "elle18_india",
        "livon",
        "herbalessencesindia",
        "thebeautycoindia",
        "loverecode",
        "facescanada",
        "clinique_in",
        "suroskiebeauty",
        "paccosmetic",
        "litcosmeticsindia",
        "beauty_secret_highend_makeup",
        "biodermaindia",
        "mymakeupstory_india",
        "londonprimeindia",
        "charactercosmeticsindia",
        "forever52india",
        "keautybeauty.in",
        "shopaarel",
        "justgoldindia",
        "lashupindia",
        "saturnhealth.ghc",
        "skinkraftshop",
        "marshealth.ghc",
        "beromtnails",
        "blissible_cosmetics",
        "prishebeauty",
        "kindedbeauty",
        "abelia.cares",
        "farmbeautyofficial",
        "ellement.company",
        "skinpassword.in",
        "kaybykatrina",
        "82e.official",
        "arata.in",
        "herbyangelofficial",
        "nutriglowcosmetics",
        "oh_ind",
        "kapiva_official",
        "mcaffeineofficial",
        "perforaofficial",
        "sephora_india",
        "recodeeverydaymakeup",
        "officialswissbeauty",
        "mycosiq",
        "suganda.co",
        "myskinq",
        "auliglow",
        "theformularx",
        "himairag",
        "dermabayskincare",
        "beardo.official",
        "themancompany",
        "reginaldmen.grooming",
        "bombayshavingcompany",
        "mybombae__",
        "yaanman.official",
        "allman.in",
        "dollarshaveclub",
        "oldspice",
        "oldspiceindia",
        "oldspicehair",
        "oldspiceuk",
        "foxtaleskin",
        "ohsogaga",
        "riseformen",
        "dotandkey.skincare",
        "gush.beauty",
        "try.moody",
        "moxiebeautyofficial",
        "poshanskincare",
        "offduty.india",
        "emprallofficial",
        "littlebox.india",
        "tokyo_talkies",
        "freakinsindia",
        "newme.asia",
        "uptownie101",
        "crayy.heads",
        "bonkers.corner",
        "streetstylestoreofficial",
        "maincharacter_india",
        "manyavar",
        "izfworld",
        "virgio.official",
        "staywrogn",
        "theindiangarageco_",
        "ajiolife",
        "fabindiaofficial",
        "bibaindia",
        "allensollyindia",
        "wforwoman",
        "raymond_the_complete_man",
        "spykarofficial",
        "siyaramsindia",
        "kalkifashion",
        "bunaai",
        "libasindia",
        "shopmulmul",
        "houseofmasaba",
        "midsummer.india",
        "endlessummer.shop",
        "shopaamili",
        "layrrd",
        "kicaactive",
        "fuaarkofficial",
        "zymratwear",
        "ovrtrn",
        "birdeye.india",
        "hrxbrand",
        "chapter2drip"
    ]

    all_handles = creator_handles + brand_handles

    for handle in all_handles:
        profile_path = os.path.join(BASE_DIR, handle, "profile.json")
        if not os.path.exists(profile_path):
            print(f"{handle} profile.json not found.")
            continue
        
        try:
            print(f"Classifying {handle}")
            is_human = classify_by_bio(handle, profile_path)
            output[handle] = is_human
        except Exception as e:
            print(f"Error in {handle}: {e}")

    date = datetime.now().strftime("%Y-%m-%d")
    with open(f"{GEOGRAPHY}_bio_classification_{date}.json", "w") as f:
        json.dump(output, f, indent=2)

if __name__ == "__main__":
    main()

Classifying _.dracarys.x_
Classifying _.harshitha_.gowda._
Classifying _.heartwants._
Classifying _.hett
Classifying _.keshavi._
Classifying _.kirranchauhan._
Classifying _.life_in_pastel._
Classifying _.manisha_kulal
Classifying _.mile.y
Classifying _.minney__06
Classifying _.miss_komal_.1026
Classifying _.nickyy75_
Classifying _.nutrihealth
Classifying _.palakvasudeva._
Classifying _.phenomenail
Classifying _.poojjja._
Classifying _.priyanshiii01
Classifying _.ranideepa._
Classifying _.samridhii_
Classifying _.shikhachaudhary._
Classifying _.shobhomita
Classifying _.shrush_
Classifying _.srilakshmi.__
Classifying _.sudeshnax.__
Classifying _.tanisha._.22._


Exception ignored in: <generator object PoolByteStream.__iter__ at 0x7d85c4389540>
Traceback (most recent call last):
  File "/home/yash-sisodia/face-detection/venv/lib/python3.12/site-packages/httpcore/_sync/connection_pool.py", line 406, in __iter__
    self.close()
  File "/home/yash-sisodia/face-detection/venv/lib/python3.12/site-packages/httpcore/_sync/connection_pool.py", line 416, in close
    with self._pool._optional_thread_lock:
  File "/home/yash-sisodia/face-detection/venv/lib/python3.12/site-packages/httpcore/_synchronization.py", line 268, in __enter__
    self._lock.acquire()
KeyboardInterrupt: 
