In [1]:
# Uncomment these lines when running for the first time to install the required packages.  This is mostly required for local development.
# # %pip install openai
# %pip install google-auth
# %pip install google-cloud-bigquery
# %pip install google-cloud-storage
# %pip install lca
# %pip install banjo
# %pip install tqdm
# %pip install oauth2client
# %pip install tqdm.contrib
# %pip install google-generativeai
# %pip install google-genai
# %pip install av

In [2]:
import os
import time
import tempfile
import logging
from datetime import datetime, timedelta, timezone
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

import requests
import pandas as pd
import av
from tqdm import tqdm
from tqdm.contrib.concurrent import thread_map
from functools import partial

from google.cloud import bigquery, storage
from google import genai
from google.api_core.exceptions import TooManyRequests

from banjo import utils
from banjo.utils.shibainu import (
    Classification,
    estimate_run_cost,
    configure_logger,
)

# Import maps viral places classification utils
from utils.constant import get_viral_places_query, place_agg_dict, PLACE_REQUIRED_KEYS, PLACE_LIST_COLS, VIDEO_REQUIRED_KEYS, SELECTED_COLS, STORY_COLS_RENAME
from utils.helper import download_and_upload, topk_by_score_per_place, parse_incident_json_broken, parse_incident_safe, majority_vote, combine_text_list
from utils.prompt import VIDEO_CLASSIFIER_PROMPT, TEXT_CLASSIFIER_PROMPT

# Configure logging
configure_logger(level=logging.ERROR)

  from .autonotebook import tqdm as notebook_tqdm


<Logger shibainu (ERROR)>

#### Parameters

In [3]:
START_DATE = '20250703'
END_DATE = '20250705'
VIEW_WEIGHT = 0.6
FRESHNESS_WEIGHT = 0.4
SAMPLE_PER_PLACE = 5
MAX_WORKERS = 10

# Initialize GCS client
client = storage.Client(project="myaigcp")
BUCKET_NAME = "shiba-inu-temp"
BUCKET_FOLDER = "maps_events_20250720"
BUCKET = client.bucket(BUCKET_NAME)

# Save Destination 
WRITE_PROJECT_ID = "sc-bq-gcs-billingonly"
WRITE_DATASET = "temp_datascience"
WRITE_TABLE_NAME = "maps_viral_places_classification"

DESTINATION = f"{WRITE_PROJECT_ID}.{WRITE_DATASET}.{WRITE_TABLE_NAME}"
# SERVICE_ACCOUNT = 'shiba-inu@sc-product-datascience.iam.gserviceaccount.com'

#### Import Data and Upload Media URL to GCS 

In [4]:
#Step 1: Import Data
query = get_viral_places_query(START_DATE, END_DATE, VIEW_WEIGHT, FRESHNESS_WEIGHT)
df = utils.gbq.read_gbq(query, 
                    project_id="myaigcp",
                    dialect="standard",
                    priority="interactive")


E0000 00:00:1762470515.783535 1692827 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [5]:
# Step 2: Download and Upload Media URL to GCS
worker = partial(
    download_and_upload,
    bucket=BUCKET,
    bucket_name=BUCKET_NAME,
    bucket_folder=BUCKET_FOLDER,
    url_col='media_url',
    id_col='story_snap_id',
)
rows = df.to_dict(orient="records")       
with ThreadPoolExecutor(max_workers=10) as ex:
    results = list(tqdm(ex.map(worker, rows), total=len(rows)))

# Write the results back to the original df
df.loc[df.index, "gcs_url"] = results


  1%|          | 11/2096 [00:02<05:10,  6.72it/s] 

Error processing https://cf-st.sc-cdn.net/d/ozXhUb7dcBDhufHmaObo9.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCKjeocMGSAJQSWABogE3CIoIEiUKIwiR004gATDgAzjWBkABSg4KCUxBSFBLTUlAOhD0A1D8TWgCIgsSACoHSVJaWFNPWZAD_E0%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYZmhyZ2NtaGZlAZfX8aK2AZfX8aHdAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/ozXhUb7dcBDhufHmaObo9.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCKjeocMGSAJQSWABogE3CIoIEiUKIwiR004gATDgAzjWBkABSg4KCUxBSFBLTUlAOhD0A1D8TWgCIgsSACoHSVJaWFNPWZAD_E0%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/3S9RVR8l0rofUGAnyiogd.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCNTJoMMGSAJQSWABogE4CIoIEiYKJAjuvDwgATDgAzjWBkABSg8KCq4BR0RGOjM_MDgQ9ANQyjloAiILEgAqB0lSWlhTT1mQA8o5&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYY3Z6enBsbXluAZfWz1yJAZfWz1v4AAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/3S9RVR8l0rofUGAnyiogd.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCNTJoMMGSAJQSWABogE4CIoIEiYKJAjuvDwgATDgAzjWBkABSg8KCq4BR0RGOjM_MDgQ9ANQyjloAiILEgAqB0lSWlhTT1m

  7%|▋         | 145/2096 [00:12<04:13,  7.68it/s]

Error processing https://cf-st.sc-cdn.net/d/RwgXDFyGZMAjFpBOsNPvA.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCOzvo8MGSAJQSWABogE3CIoIEiUKIwjYtR4gATDgAzjUBkABSg4KCTwdHB8bHBsZFBD0A1CmRWgCIgsSACoHSVJaWFNPWZADpkU%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYb2F5dmlnZ2l2AZfaB_fvAZfaB_fXAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/RwgXDFyGZMAjFpBOsNPvA.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCOzvo8MGSAJQSWABogE3CIoIEiUKIwjYtR4gATDgAzjUBkABSg4KCTwdHB8bHBsZFBD0A1CmRWgCIgsSACoHSVJaWFNPWZADpkU%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/CFKyBfxC6UEz2q2hsLE2X.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCOrvo8MGSAJQSWABogE3CIoIEiUKIwjAxCUgATDgAzjUBkABSg4KCTcSGhwdHx4eIhD0A1CuTmgCIgsSACoHSVJaWFNPWZADrk4%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYd3huZnZ6dnpvAZfaB_GLAZfaB_FyAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/CFKyBfxC6UEz2q2hsLE2X.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCOrvo8MGSAJQSWABogE3CIoIEiUKIwjAxCUgATDgAzjUBkABSg4KCTcSGhwdHx4eIhD0A1CuTmgCIgsSACoHSVJaWFNPW

 14%|█▍        | 302/2096 [00:21<02:29, 12.03it/s]

Error processing https://cf-st.sc-cdn.net/d/bsvO8QWgkIRJv6StwhAXq.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCMWCosMGSANQSWABogE4CIoIEiYKJAiagE8gATDgAzjWBkABSg8KCo4BQ109VjU4MC0Q9ANQhk5oAiILEgAqB0lSWlhTT1mQA4ZO&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYZ2V5aHRtb2FjAZfYOE6uAZfYOEiSAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/bsvO8QWgkIRJv6StwhAXq.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCMWCosMGSANQSWABogE4CIoIEiYKJAiagE8gATDgAzjWBkABSg8KCo4BQ109VjU4MC0Q9ANQhk5oAiILEgAqB0lSWlhTT1mQA4ZO&uc=73
Error processing https://cf-st.sc-cdn.net/i/1V7SiPD33SfMBhRf2ng44.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCL2CosMGSANQSWABogE3CIoIEiUKIwiKjSQgATDgAzjWBkABSg4KCWhQFTM7KyE0NRD0A1CWKWgCIgsSACoHSVJaWFNPWZADlik%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYc256Y3FlcGJxAZfYOFH9AZfYOEicAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/i/1V7SiPD33SfMBhRf2ng44.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCL2CosMGSANQSWABogE3CIoIEiUKIwiKjSQgATDgAzjWBkABSg4KCWhQFTM7KyE0NRD0A1CWKWgCIgsSACoHSVJaWFNPWZADl

 24%|██▍       | 500/2096 [00:35<02:19, 11.43it/s]

Error processing https://cf-st.sc-cdn.net/d/vcWy95xjym3NZVcXiuObX.1034.IRZXSOY?mo=GlEaFDIBBDoBfUIGCLrSoMMGSAJQSWABogE0CIoIEiIKIAjLthwgATDgAzjUBkABSgsKBoIBTUZMRBD0A1DoFmgCIgsSACoHSVJaWFNPWZAD6BY%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYa25ibHVsY2FxAZfW4JvBAZfW4JuxAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/vcWy95xjym3NZVcXiuObX.1034.IRZXSOY?mo=GlEaFDIBBDoBfUIGCLrSoMMGSAJQSWABogE0CIoIEiIKIAjLthwgATDgAzjUBkABSgsKBoIBTUZMRBD0A1DoFmgCIgsSACoHSVJaWFNPWZAD6BY%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/OtPSIBPLOewSwOb3gP80S.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCL3SoMMGSAJQSWABogE4CIoIEiYKJAiSrFEgATDgAzjUBkABSg8KCqMBRkpQQTkzOzkQ9ANQrk5oAiILEgAqB0lSWlhTT1mQA65O&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYbnVmdGh2em1kAZfW4JiSAZfW4Jh8AAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/OtPSIBPLOewSwOb3gP80S.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCL3SoMMGSAJQSWABogE4CIoIEiYKJAiSrFEgATDgAzjUBkABSg8KCqMBRkpQQTkzOzkQ9ANQrk5oAiILEgAqB0lSWlhTT1mQA65O&uc

 30%|███       | 637/2096 [00:43<01:35, 15.27it/s]

Error processing https://cf-st.sc-cdn.net/d/DbRER0uuNB2zcw12MqWdQ.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPL6n8MGSAJQSWABogE3CIoIEiUKIwjLlhYgATDgAzjUBkABSg4KCUAcIyEmKyYrIBD0A1DuI2gCIgsSACoHSVJaWFNPWZAD7iM%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYdWRlcmNyZGNxAZfWNY9BAZfWNY8qAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/DbRER0uuNB2zcw12MqWdQ.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPL6n8MGSAJQSWABogE3CIoIEiUKIwjLlhYgATDgAzjUBkABSg4KCUAcIyEmKyYrIBD0A1DuI2gCIgsSACoHSVJaWFNPWZAD7iM%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/qPhdKRUV3EEgLFHiB81t8.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPT6n8MGSAJQSWABogE3CIoIEiUKIwjViysgATDgAzjUBkABSg4KCVceIicjHiEeHRD0A1CkTmgCIgsSACoHSVJaWFNPWZADpE4%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYdGpwdXZrdmlwAZfWNYv3AZfWNYvcAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/qPhdKRUV3EEgLFHiB81t8.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPT6n8MGSAJQSWABogE3CIoIEiUKIwjViysgATDgAzjUBkABSg4KCVceIicjHiEeHRD0A1CkTmgCIgsSACoHSVJaWFNPW

 54%|█████▎    | 1124/2096 [01:15<00:46, 21.12it/s]

Error processing https://cf-st.sc-cdn.net/d/RKIuSasBh0FUri4cBKdFa.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCNC4osMGSAJQSWABogE3CIoIEiUKIwiq1T8gATDgAzjWBkABSg4KCUc5NSw3RSs1MRD0A1CQTmgCIgsSACoHSVJaWFNPWZADkE4%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYcHdhZG1rcXpmAZfYohO1AZfYofaBAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/RKIuSasBh0FUri4cBKdFa.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCNC4osMGSAJQSWABogE3CIoIEiUKIwiq1T8gATDgAzjWBkABSg4KCUc5NSw3RSs1MRD0A1CQTmgCIgsSACoHSVJaWFNPWZADkE4%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/CQavLdhTY75jlzsFly5tO.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCMy4osMGSAJQSWABogE3CIoIEiUKIwikwCIgATDgAzjWBkABSg4KCUxAPDgzNzs6NBD0A1DAJWgCIgsSACoHSVJaWFNPWZADwCU%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYYmNycGN2aHh1AZfYohVDAZfYofaLAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/CQavLdhTY75jlzsFly5tO.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCMy4osMGSAJQSWABogE3CIoIEiUKIwikwCIgATDgAzjWBkABSg4KCUxAPDgzNzs6NBD0A1DAJWgCIgsSACoHSVJaWFNPW

 60%|██████    | 1259/2096 [01:23<00:44, 18.60it/s]

Error processing https://cf-st.sc-cdn.net/d/EB1GYP8W0mbzqfiObChZW.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCMeRpcMGSAJQSWABogE4CIoIEiYKJAiQ6zYgATDgAzjWBkABSg8KCowBVWA9ODs6MjQQ9ANQjDNoAiILEgAqB0lSWlhTT1mQA4wz&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYeHFqbHRvYWZuAZfbQ8CPAZfbQ7usAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/EB1GYP8W0mbzqfiObChZW.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCMeRpcMGSAJQSWABogE4CIoIEiYKJAiQ6zYgATDgAzjWBkABSg8KCowBVWA9ODs6MjQQ9ANQjDNoAiILEgAqB0lSWlhTT1mQA4wz&uc=73


 68%|██████▊   | 1419/2096 [01:32<00:28, 24.07it/s]

Error processing https://cf-st.sc-cdn.net/d/EKPAhlpxnR851dB3YpnqK.1034.IRZXSOY?mo=GlgaFDIBBDoBfUIGCJinjcMGSAJQSWABogE6CIoIEigKJgiS6cgHIAEw4AM41AZAAUoPCgrjAVhORksvOVImEPQDUO79CmgCIgsSACoHSVJaWFNPWZAD7v0K&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYcXpma29tcXFzAZfD-9kzAZfD-9kdAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/EKPAhlpxnR851dB3YpnqK.1034.IRZXSOY?mo=GlgaFDIBBDoBfUIGCJinjcMGSAJQSWABogE6CIoIEigKJgiS6cgHIAEw4AM41AZAAUoPCgrjAVhORksvOVImEPQDUO79CmgCIgsSACoHSVJaWFNPWZAD7v0K&uc=73


 69%|██████▉   | 1448/2096 [01:34<00:37, 17.32it/s]

Error processing https://cf-st.sc-cdn.net/i/s3oc0H4gS9VNDTUywpaXt.1034.IRZXSOY?mo=GlUaFDIBCToBfUIGCLGHpcMGSAJQSWABogE4CIoIEiYKJAiDpE8gATDgAzjWBkABSg8KCo8BQ1xKSTo9OC4Q9ANQzE5oAiILEgAqB0lSWlhTT1mQA8xO&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYbWFpcXd4aW1nAZfbL9HHAZfbL8srAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/i/s3oc0H4gS9VNDTUywpaXt.1034.IRZXSOY?mo=GlUaFDIBCToBfUIGCLGHpcMGSAJQSWABogE4CIoIEiYKJAiDpE8gATDgAzjWBkABSg8KCo8BQ1xKSTo9OC4Q9ANQzE5oAiILEgAqB0lSWlhTT1mQA8xO&uc=73
Error processing https://cf-st.sc-cdn.net/d/i8dGLegyTX84WgbIS1FQC.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCLyHpcMGSANQSWABogE4CIoIEiYKJAi9_04gATDgAzjWBkABSg8KCoUBWExHQUFAMz8Q9ANQkE5oAiILEgAqB0lSWlhTT1mQA5BO&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYd2t2YndiemN5AZfbL9AuAZfbL8shAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/i8dGLegyTX84WgbIS1FQC.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCLyHpcMGSANQSWABogE4CIoIEiYKJAi9_04gATDgAzjWBkABSg8KCoUBWExHQUFAMz8Q9ANQkE5oAiILEgAqB0lSWlhTT1mQA5B

 69%|██████▉   | 1451/2096 [01:36<02:28,  4.34it/s]

Error processing https://cf-st.sc-cdn.net/i/t4EqbZ6anMmqPyhHsiuxY.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCKqHpcMGSAJQSWABogE3CIoIEiUKIwjUr04gATDgAzjWBkABSg4KCXU9Tzs_PUw8OhD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYbm5wdHp6c2J3AZfbL8ygAZfbL8sNAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/i/t4EqbZ6anMmqPyhHsiuxY.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCKqHpcMGSAJQSWABogE3CIoIEiUKIwjUr04gATDgAzjWBkABSg4KCXU9Tzs_PUw8OhD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73


 76%|███████▌  | 1587/2096 [01:46<00:32, 15.52it/s]

Error processing https://cf-st.sc-cdn.net/d/vuyy3uHccfHPkyrCoAPki.1034.IRZXSOY?mo=Gk8aFDIBBDoBfUIGCO6ipMMGSAJQSWABogEyCIoIEiAKHgiD1BEgATDgAzjWBkABSgkKBHYwNTUQ9ANQmBFoAiILEgAqB0lSWlhTT1mQA5gR&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYb2VsbmhkaWNuAZfaa62uAZfaa6nkAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/vuyy3uHccfHPkyrCoAPki.1034.IRZXSOY?mo=Gk8aFDIBBDoBfUIGCO6ipMMGSAJQSWABogEyCIoIEiAKHgiD1BEgATDgAzjWBkABSgkKBHYwNTUQ9ANQmBFoAiILEgAqB0lSWlhTT1mQA5gR&uc=73


 79%|███████▊  | 1647/2096 [01:49<00:22, 19.58it/s]

Error processing https://cf-st.sc-cdn.net/d/OeBYXIWe226B3fE9qin2U.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPH9o8MGSAJQSWABogE3CIoIEiUKIwjJlysgATDgAzjWBkABSg4KCUApSVZKSVtLVRD0A1DAJWgCIgsSACoHSVJaWFNPWZADwCU%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYYWhyd2VrbWlwAZfaI1iqAZfaI1c_AAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/OeBYXIWe226B3fE9qin2U.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCPH9o8MGSAJQSWABogE3CIoIEiUKIwjJlysgATDgAzjWBkABSg4KCUApSVZKSVtLVRD0A1DAJWgCIgsSACoHSVJaWFNPWZADwCU%3D&uc=73


 80%|███████▉  | 1671/2096 [01:51<00:29, 14.42it/s]

Error processing https://cf-st.sc-cdn.net/d/mDRayYHDYPiO7mXY2zAe7.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCJbao8MGSAJQSWABogE4CIoIEiYKJAiZh08gATDgAzjWBkABSg8KCskBOzY7RTk3OTYQ9ANQuE5oAiILEgAqB0lSWlhTT1mQA7hO&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYYnplY3ZxaXNoAZfZ3XnBAZfZ3XR_AAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/mDRayYHDYPiO7mXY2zAe7.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCJbao8MGSAJQSWABogE4CIoIEiYKJAiZh08gATDgAzjWBkABSg8KCskBOzY7RTk3OTYQ9ANQuE5oAiILEgAqB0lSWlhTT1mQA7hO&uc=73
Error processing https://cf-st.sc-cdn.net/d/bAfaZZCicHZA8HUp90Cdu.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCI3ao8MGSAJQSWABogE4CIoIEiYKJAi1jU4gATDgAzjWBkABSg8KCr8BSj0_N0I8MzYQ9ANQ8k1oAiILEgAqB0lSWlhTT1mQA_JN&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYb2FveWdid21nAZfZ3XbXAZfZ3XR1AAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/bAfaZZCicHZA8HUp90Cdu.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCI3ao8MGSAJQSWABogE4CIoIEiYKJAi1jU4gATDgAzjWBkABSg8KCr8BSj0_N0I8MzYQ9ANQ8k1oAiILEgAqB0lSWlhTT1mQA_J

 80%|████████  | 1681/2096 [01:51<00:18, 22.58it/s]

Error processing https://cf-st.sc-cdn.net/d/1ft5dDgKxEVkQ0vdlUtZJ.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCIrXo8MGSAJQSWABogE3CIoIEiUKIwjnny8gATDgAzjWBkABSg4KCXo8QUE8NT4_SxD0A1DCK2gCIgsSACoHSVJaWFNPWZADwis%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYcWRwc2d4aWZ5AZfZ145ZAZfZ14xBAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/1ft5dDgKxEVkQ0vdlUtZJ.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCIrXo8MGSAJQSWABogE3CIoIEiUKIwjnny8gATDgAzjWBkABSg4KCXo8QUE8NT4_SxD0A1DCK2gCIgsSACoHSVJaWFNPWZADwis%3D&uc=73
Error processing https://cf-st.sc-cdn.net/i/6awRpHnldcB4EnPk4vVnT.1034.IRZXSOY?mo=GlUaFDIBCToBfUIGCMvYo8MGSANQSWABogE4CIoIEiYKJAixxksgATDgAzjWBkABSg8KCq4BTkc_QDQ2ODgQ9ANQxEpoAiILEgAqB0lSWlhTT1mQA8RK&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYYmF1eGVyam1oAZfZ2n7wAZfZ2kalAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/i/6awRpHnldcB4EnPk4vVnT.1034.IRZXSOY?mo=GlUaFDIBCToBfUIGCMvYo8MGSANQSWABogE4CIoIEiYKJAixxksgATDgAzjWBkABSg8KCq4BTkc_QDQ2ODgQ9ANQxEpoAiILEgAqB0lSWlhTT1m

 80%|████████  | 1684/2096 [01:52<00:19, 20.63it/s]

Error processing https://cf-st.sc-cdn.net/d/R13HOuVLGKumevZExFQRE.1034.IRZXSOY?mo=GlEaFDIBBDoBfUIGCIDOo8MGSANQSWABogE0CIoIEiIKIAiUjR0gATDgAzjWBkABSgsKBoABSEtLTBD0A1D8FmgCIgsSACoHSVJaWFNPWZAD_BY%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYeXZ1Y2ZxemtxAZfZxd5SAZfZxcOOAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/R13HOuVLGKumevZExFQRE.1034.IRZXSOY?mo=GlEaFDIBBDoBfUIGCIDOo8MGSANQSWABogE0CIoIEiIKIAiUjR0gATDgAzjWBkABSgsKBoABSEtLTBD0A1D8FmgCIgsSACoHSVJaWFNPWZAD_BY%3D&uc=73
Error processing https://cf-st.sc-cdn.net/d/SMXtmJ61wlGwIhx2dc5cy.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCIHOo8MGSAJQSWABogE4CIoIEiYKJAjS3E4gATDgAzjWBkABSg8KCrsBSkA4OTo9NDkQ9ANQ8k1oAiILEgAqB0lSWlhTT1mQA_JN&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYdnluaG16eWFhAZfZxdkoAZfZxcOEAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/SMXtmJ61wlGwIhx2dc5cy.1034.IRZXSOY?mo=GlUaFDIBBDoBfUIGCIHOo8MGSAJQSWABogE4CIoIEiYKJAjS3E4gATDgAzjWBkABSg8KCrsBSkA4OTo9NDkQ9ANQ8k1oAiILEgAqB0lSWlhTT1mQA_JN&uc

 82%|████████▏ | 1716/2096 [01:54<00:27, 13.81it/s]

Error processing https://cf-st.sc-cdn.net/d/SKIC9LVEDMgMRFKPdZcDw.1034.IRZXSOY?mo=Gk0aFDIBBDoBfUIGCKrrosMGSAJQSWABogEwCIoIEh4KHAi6gQYgATDgAzjWBkABSgcKAjYbEPQDUJQKaAIiCxIAKgdJUlpYU09ZkAOUCg%3D%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYd25nZnpxcmF2AZfZBT2DAZfZBTo4AAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/SKIC9LVEDMgMRFKPdZcDw.1034.IRZXSOY?mo=Gk0aFDIBBDoBfUIGCKrrosMGSAJQSWABogEwCIoIEh4KHAi6gQYgATDgAzjWBkABSgcKAjYbEPQDUJQKaAIiCxIAKgdJUlpYU09ZkAOUCg%3D%3D&uc=73


 92%|█████████▏| 1923/2096 [02:07<00:16, 10.27it/s]

Error processing https://cf-st.sc-cdn.net/d/I0K82ymDtYdm5VGBZRUc3.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCLe-oMMGSAJQSWABogE3CIoIEiUKIwjk3DsgATDgAzjWBkABSg4KCVEtMDAsKzAvNBD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYdXpnb3BvYXZwAZfWuXQyAZfWuXF5AAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/I0K82ymDtYdm5VGBZRUc3.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCLe-oMMGSAJQSWABogE3CIoIEiUKIwjk3DsgATDgAzjWBkABSg4KCVEtMDAsKzAvNBD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73


 92%|█████████▏| 1935/2096 [02:07<00:07, 20.43it/s]

Error processing https://cf-st.sc-cdn.net/i/nikbZXLMAM9MAiiF4Ciuu.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCMSxoMMGSAJQSWABogE3CIoIEiUKIwjAhiwgATDgAzjWBkABSg4KCV9JY1JKPjszQRD0A1DqJmgCIgsSACoHSVJaWFNPWZAD6iY%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYem9hYmJteG5wAZfWoDWDAZfWoDQKAAAAAw: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/i/nikbZXLMAM9MAiiF4Ciuu.1034.IRZXSOY?mo=GlQaFDIBCToBfUIGCMSxoMMGSAJQSWABogE3CIoIEiUKIwjAhiwgATDgAzjWBkABSg4KCV9JY1JKPjszQRD0A1DqJmgCIgsSACoHSVJaWFNPWZAD6iY%3D&uc=73


 93%|█████████▎| 1940/2096 [02:08<00:08, 18.83it/s]

Error processing https://cf-st.sc-cdn.net/d/Znzu32GrWxWnRcwIqmKjR.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCLPsn8MGSAJQSWABogE3CIoIEiUKIwiXkzQgATDgAzjWBkABSg4KCUg7NjApQTA4MxD0A1D2O2gCIgsSACoHSVJaWFNPWZAD9js%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYYXl6eWppZmpuAZfWGTePAZfV-4rgAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/Znzu32GrWxWnRcwIqmKjR.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCLPsn8MGSAJQSWABogE3CIoIEiUKIwiXkzQgATDgAzjWBkABSg4KCUg7NjApQTA4MxD0A1D2O2gCIgsSACoHSVJaWFNPWZAD9js%3D&uc=73


 98%|█████████▊| 2064/2096 [02:15<00:01, 18.39it/s]

Error processing https://cf-st.sc-cdn.net/d/OOVDJYPVETqMbf9R2C1X6.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCJmtlMMGSAJQSWABogE3CIoIEiUKIwi23zcgATDgAzjUBkABSg4KCW0XUyAlJhwxRBD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73 for story_id W7_EDlXWTBiXAEEniNoMPwAAYd2x1cmJmYW9vAZfK37m_AZfK3QseAAAAAQ: 404 Client Error: Not Found for url: https://cf-st.sc-cdn.net/d/OOVDJYPVETqMbf9R2C1X6.1034.IRZXSOY?mo=GlQaFDIBBDoBfUIGCJmtlMMGSAJQSWABogE3CIoIEiUKIwi23zcgATDgAzjUBkABSg4KCW0XUyAlJhwxRBD0A1DyTWgCIgsSACoHSVJaWFNPWZAD8k0%3D&uc=73


100%|██████████| 2096/2096 [02:16<00:00, 15.32it/s]


#### Call Shibainu Classification

In [6]:
# Step 1: Initialize Shibainu Classification
video_classifier = Classification(
    provider_name="gemini",
    model_name='gemini-2.0-flash',
    input_type="video",
    provider_config={"project_id": "myaigcp",
                    "location": "us-central1"}, #this config is used for gemini only
    processor_config = {
        "processing_mode": "image_url",  # "image_url" or "bytes"
        'return_direct_url': True       
    },
    model_parameters={
        "temperature": 0,
        "max_token": 1024
    },
    prompt=VIDEO_CLASSIFIER_PROMPT
)

video_classifier.get_result(video_classifier.send_message('What is your model?'))

'I am a large language model, trained by Google.'

In [7]:
# Step2: Choose top K videos per place and run classification
df_selected_raw = topk_by_score_per_place(df, group_col='place_id', order_col='score', filter_col='gcs_url', k=SAMPLE_PER_PLACE)
video_classifier_results = thread_map(video_classifier.classify, df_selected_raw['gcs_url'].tolist(), max_workers=MAX_WORKERS)

 82%|████████▏ | 40/49 [00:30<00:05,  1.73it/s]

2025-11-06 15:11:40,968 - shibainu - ERROR - Error during classification after 21.37 seconds: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.', 'status': 'RESOURCE_EXHAUSTED'}}
2025-11-06 15:11:41,018 - shibainu - ERROR - Error during classification after 20.68 seconds: Server disconnected without sending a response.
2025-11-06 15:11:41,804 - shibainu - ERROR - Error during classification after 22.20 seconds: Cannot send a request, as the client has been closed.


100%|██████████| 49/49 [00:53<00:00,  1.09s/it]


In [8]:
# Step 3: Parse labels and construct the summary table
video_labels = [video_classifier.get_result(r) for r in video_classifier_results]
video_usage  = [video_classifier.get_token_usage(r) for r in video_classifier_results]

df_video_summary = df_selected_raw.copy()
df_video_summary.loc[:, "video_labels"] = video_labels
df_video_summary.loc[:, "video_prompt_tokens"]= [u.get("prompt_tokens") for u in video_usage]
df_video_summary.loc[:, "video_completion_tokens"]= [u.get("completion_tokens") for u in video_usage]

# 3.1 Parse labels
video_parsed = (
    df_video_summary["video_labels"]
    .map(lambda x: parse_incident_safe(x, VIDEO_REQUIRED_KEYS))
    .apply(pd.Series)
)

# 3.2 Attach parsed columns
df_video_summary = pd.concat([df_video_summary, video_parsed], axis=1)

#### Using Text Classification to Consolidate

In [9]:
# Group and aggregate
df_place_raw = df_video_summary.groupby('place_id', as_index=False).agg(place_agg_dict)

In [10]:
# Text classification
text_classifier = Classification(
    provider_name="gemini",
    model_name='gemini-2.0-flash',
    input_type="text",
    provider_config={"project_id": "myaigcp",
                    "location": "us-central1"}, #this config is used for gemini only
    # processor_config = {
    #     "processing_mode": "image_url",  # "image_url" or "bytes"
    #     'return_direct_url': True       
    # },
    model_parameters={
        "temperature": 0,
        "max_token": 2000,
    },
    prompt=TEXT_CLASSIFIER_PROMPT
)

text_results = thread_map(text_classifier.classify, df_place_raw.to_dict("records"), max_workers=10)

100%|██████████| 11/11 [00:04<00:00,  2.21it/s]


In [None]:
# Construct the results
text_labels = [text_classifier.get_result(r) for r in text_results]

df_place_summary = df_place_raw.copy()
df_place_summary.drop(columns=["key_objects", "activity_type", "contributing_context",
                               "short_description", "long_description", "keywords"], inplace=True)
df_place_summary.loc[:, "text_labels"] = text_labels


# Parse labels
text_parsed = (
    df_place_summary["text_labels"]
    .map(lambda x: parse_incident_safe(x, PLACE_REQUIRED_KEYS))
    .apply(pd.Series)
)

df_place_summary = pd.concat([df_place_summary, text_parsed], axis=1)

#### Consolidate the Output

In [12]:
df_video_summary = df_video_summary.rename(
    columns={col: f"video_{col}" for col in STORY_COLS_RENAME}
)
df_merged = df_place_summary.merge(df_video_summary, on=['place_id', 'continent','place_name', 'place_country_code',
                                                        'detection_start_time', 'detection_end_time'], how='left')

#### Write Output to BQ

In [13]:
df_merged.to_gbq(destination_table= DESTINATION, project_id= WRITE_PROJECT_ID, if_exists='replace')
print(f"`{DESTINATION}` was created successfully!")

100%|██████████| 1/1 [00:00<00:00, 702.68it/s]

`sc-bq-gcs-billingonly.temp_datascience.maps_viral_places_classification` was created successfully!





In [15]:
df_merged

Unnamed: 0,place_id,continent,place_name,place_country_code,detection_start_time,detection_end_time,virality_potential,event_type,event_scale,event_duration,...,video_keywords,video_event_type,video_event_scale,video_event_duration,video_event_intensity,video_associated_mood,video_key_objects_entities,video_activity_type,video_contributing_context,video_virality_potential
0,4c41d4e0-a681-11e8-9074-7f02ac589aae,Europe,Marmande,FR,2025-07-05 06:30:00,2025-07-05 15:45:00,0.8,Social Gathering,Regional,Short-term,...,"Concert, Music, Dj, Crowd, Lights, Stage, Nigh...",Social Gathering,Regional,Short-term,High,Positive,"Crowd, Stage, Lights, Djs","Dancing, Performing, Listening","Night, Indoor",1.0
1,4c41d4e0-a681-11e8-9074-7f02ac589aae,Europe,Marmande,FR,2025-07-05 06:30:00,2025-07-05 15:45:00,0.8,Social Gathering,Regional,Short-term,...,"Concert, Live performance, Stage, Crowd, Music...",Social Gathering,Regional,Short-term,High,Positive,"Stage, Crowd, Lights","Performing, Listening, Cheering","Night, Indoor",1.0
2,4c41d4e0-a681-11e8-9074-7f02ac589aae,Europe,Marmande,FR,2025-07-05 06:30:00,2025-07-05 15:45:00,0.8,Social Gathering,Regional,Short-term,...,"Concert, Music, Crowd, Lights, Stage, Performa...",Social Gathering,Regional,Short-term,High,Positive,"Crowd, Stage, Lights","Cheering, Listening, Dancing","Night, Outdoor",1.0
3,4c41d4e0-a681-11e8-9074-7f02ac589aae,Europe,Marmande,FR,2025-07-05 06:30:00,2025-07-05 15:45:00,0.8,Social Gathering,Regional,Short-term,...,"Music festival, Crowd, Dj, Stage, Ctrl+b, Outd...",Social Gathering,Regional,Short-term,High,Positive,"Crowd, Stage, Dj","Dancing, Listening, Partying","Outdoor, Sunny, Daytime",1.0
4,4c41d4e0-a681-11e8-9074-7f02ac589aae,Europe,Marmande,FR,2025-07-05 06:30:00,2025-07-05 15:45:00,0.8,Social Gathering,Regional,Short-term,...,"Garorock festival, Crowd, Music festival, Coca...",Social Gathering,Regional,Short-term,Medium,Positive,"Crowd, Umbrellas","Relaxing, Walking, Socializing","Sunny day, Outdoor",0.0
5,51c10b20-a681-11e8-8153-d333dde2706a,Asia,Kerbela,IQ,2025-07-05 04:00:00,2025-07-05 15:45:00,0.0,Social Gathering,Local,Short-term,...,"Religious leader, Speech, Sermon, Turban, Micr...",Social Gathering,Local,Short-term,Medium,Negative,"Religious leader, Microphone, People","Speaking, Listening","Indoor, Religious setting",0.0
6,51c10b20-a681-11e8-8153-d333dde2706a,Asia,Kerbela,IQ,2025-07-05 04:00:00,2025-07-05 15:45:00,0.0,Social Gathering,Local,Short-term,...,"Cooking, Pots, Outdoors, Palm trees, Food prep...",Social Gathering,Local,Short-term,Medium,Neutral,"Pots, People, Food","Cooking, Preparing food","Daytime, Outdoor",0.0
7,51c10b20-a681-11e8-8153-d333dde2706a,Asia,Kerbela,IQ,2025-07-05 04:00:00,2025-07-05 15:45:00,0.0,Social Gathering,Local,Short-term,...,"Man, Sunglasses, Car, Music, Driving, Sunroof,...",Miscellaneous or Other,Local,Short-term,Low,Neutral,"Man, Car, Sunglasses","Listening, Driving","Daytime, Sunny",0.0
8,51c10b20-a681-11e8-8153-d333dde2706a,Asia,Kerbela,IQ,2025-07-05 04:00:00,2025-07-05 15:45:00,0.0,Social Gathering,Local,Short-term,...,"Men, Walking, Oil, Supplies, Group, Sidewalk",Human Activity (Non-Social),Local,Short-term,Low,Neutral,"Men, Oil bottle, Box, Bags","Walking, Carrying","Daytime, Sidewalk",0.0
9,51c10b20-a681-11e8-8153-d333dde2706a,Asia,Kerbela,IQ,2025-07-05 04:00:00,2025-07-05 15:45:00,0.0,Social Gathering,Local,Short-term,...,"Driving, Car, Music, Sunglasses, Polo shirt, S...",Transportation Related,Local,Short-term,Low,Neutral,"Car, Man, Sunglasses","Driving, Listening","Daytime, Sunny",0.0
