# Set up


The Python SDK for the Gemini API is contained in the google-generativeai package. Install the dependency using pip:

In [None]:
!pip install -q -U google-generativeai

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.8/160.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m760.0/760.0 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25h

Imports

In [None]:
import os
import pandas as pd
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from json import loads,dumps
import time
from glob import glob
import json
import pandas as pd


Set API Key

In [None]:
os.environ["API_KEY"] = ""
genai.configure(api_key=os.environ["API_KEY"])

Mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Initialize the model

In [None]:
## We use Gemini 1.5 flash
## Lightweight, fast, efficient - for tasks where latency + speed is important

model = genai.GenerativeModel(model_name="gemini-1.5-flash",
                              system_instruction = "You are a helpful video description generator for human content moderators.")

In [None]:
# Define the prompt
prompt = """

Please provide a thorough description of this video provided along with it’s audio that captures all elements relevant for content moderation. Focus on the aspects outlined in the <guidelines> tag:

<guidelines>
VISUAL CONTENT:
1. Setting and Context
- Where does the video take place?
- What is the overall environment/atmosphere?
- Is it animated, live-action, or mixed media?
2. Characters/People
- Who appears in the video?
- Approximate age ranges of people shown
- How are they dressed?
- What are their behaviors and interactions?
3. Actions and Events
- What main activities occur?
- Are there any sudden changes in action or tone?
- Describe any concerning physical activities or stunts
- Note any visual effects or editing techniques

AUDIO CONTENT:
1. Speech
- What language(s) are spoken?
- Summarize key dialogue or conversations
- Note tone of voice (angry, friendly, threatening, etc.)
- Document any concerning language or terms used
2. Sound Effects and Music
- Background music type and mood
- Notable sound effects
- Volume levels and sudden audio changes

TIMELINE:
- Provide timestamps for significant events or changes in content
- Note duration of concerning content if present

ADDITIONAL ELEMENTS:
- Presence of text overlays or captions
- Brand names or commercial content
- Any symbols, gestures, or references that need context
- Technical aspects (video quality, editing style)
<guidelines>

Please be objective and detailed in your description, avoiding subjective interpretations. Include any elements that might affect content moderation decisions, even if they seem minor.
Format your given in the <formatting> tags:

<formatting>
VIDEO DURATION: [specify total length]

GENERAL OVERVIEW: [2-3 sentences summarizing the video]

DETAILED TIMELINE:
[00:00-00:00] - [Description of what happens in this segment]
[00:00-00:00] - [Continue for each major segment]

AUDIO DESCRIPTION: [Detailed description of audio elements]

NOTABLE ELEMENTS:
- [List any specific elements requiring attention]
- [List and describe the themes of the video]

TECHNICAL NOTES:
[Any relevant technical aspects of the video]
<formatting>

  """

# List of video ids

In [None]:
## put the video ids here
video_ids = ["BHLVY4NNF6A", "oWCK5j_XNbQ", "B1z2h_tZZq0", "IFJpD1Z73ac", "mxrZqIA4660",
    "evRZzTpdX0E", "q41N_5Yy_-E", "DoOweIO2Czc", "2ORtf3b2NJU", "tYKduNKGmUI",
    "q-NVK13mzcA", "qWFcB3ZqYpQ", "CrM0k0bJ73Q", "BqQRPRNRZ3U", "iU2rMRZcn1U",
    "M880b1yqy4s", "MA4I7SwfLEQ", "slniKxBEVSA", "zz1xcDrqplM", "Lrgk65yn3d8",
    "zLE2AdYO0M4"]

# Running for all video ids

In [None]:
for i,v in enumerate(video_ids):

  folder_path = f"/content/drive/MyDrive/Safe Online Global/video-ads/{v}"

  ## find the folder for video
  if not os.path.exists(folder_path):
    print("Could not find folder for id:",v)
  else:

      ## get the path for mp4 and mp3 files
      video_mp4 = glob(f"{folder_path}/*.mp4")
      audio_mp3 = glob(f"{folder_path}/*.mp3")

      if len(video_mp4) == 0 or len(audio_mp3)==0:
        print("Could not find video/audio for id:",v)
      else:
        path_to_video = video_mp4[0]
        path_to_audio = audio_mp3[0]

        ## upload video
        print(f"Uploading file...")
        video_file = genai.upload_file(path=path_to_video)
        print(f"Completed upload: {video_file.uri}")

        ## upload audio
        print(f"Uploading file...")
        audio_file = genai.upload_file(path=path_to_audio)
        print(f"Completed upload: {audio_file.uri}")

        ## check if video has uploaded - wait for it to upload
        while video_file.state.name == "PROCESSING":
          print('.', end='')
          time.sleep(10)
          video_file = genai.get_file(video_file.name)

        if video_file.state.name == "FAILED":
          raise ValueError(video_file.state.name)

        ### make inference
        print("Making LLM inference request...")

        try:
                response = model.generate_content(
                    [audio_file, video_file, prompt],
                    safety_settings={
                        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                    }
                )

                # Try to write the response to a file
                with open(f'/content/drive/MyDrive/Safe Online Global/gemini-trial-output/video-descriptions/video-desc-more/{v}.txt', 'w') as f:
                    if response.candidates:
                        f.write(response.text)
                        print("Completed for video number:", i)
                    else:
                        print(f"No candidates returned for id: {v}. Skipping.")

        except ValueError as e:
            print(f"Error for id {v}: {e}. Moving to next video.")

            # Wait a bit to avoid exceeding rate limits
        time.sleep(30)

Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/k7igutthesov
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/l8zp4shluarb
.Making LLM inference request...
Completed for video number: 0
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/ey0wkyhi0tie
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/fu6j940nwq8g
.Making LLM inference request...
No candidates returned for id: oWCK5j_XNbQ. Skipping.
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/as4n2z5fghf2
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/21fw5vxomilm
.Making LLM inference request...
Completed for video number: 2
Uploading file...
Completed upload: https://generativelanguage.googleapis.com/v1beta/files/zwq46bibdufv
Uploading file...
Completed upload: https://generativelangu