In [None]:
%pip install --upgrade --quiet  langchain-core langchain-google-vertexai google-cloud-aiplatform pandas openpyxl

1. Install gcloud https://cloud.google.com/sdk/docs/install#deb 
2. Run '''gcloud auth application-default login'''
3. Run '''gcloud auth application-default set-quota-project your_project_id'''

In [16]:
import vertexai
# Initialize vertexai with project ID and location
vertexai.init(project="cyrus-testing-2023", location="us-central1")

In [17]:
import base64

def get_base64_video(video_path):
    with open(video_path, "rb") as f:
        video_b64 = base64.b64encode(f.read()).decode("utf-8")
    return video_b64

In [199]:
system_instruction = """
You are the Invigilator for the Computer-Based Practical Test.

Allowed Tools:
Codespaces for coding.
Moodle for reading instructions.
Screen sharing is mandatory throughout the test.

Prohibited Actions:
1. Using messaging tools such as WhatsApp, Facebook Messenger, etc.
2. Using AI tools.
3. Searching for answers on Google, StackOverflow, etc.
"""

In [None]:
from typing import Annotated
from langchain_core.tools import tool

@tool(parse_docstring=False)
def save_anaylsis_result(
    summary: Annotated[str, "The summary of activities."],
    is_irregulation: Annotated[bool, "Set it True if there is any irregulation."], 
    explanation: Annotated[str, "Give the explanation for irregulation."],
    confidence_score: Annotated[float, "The confidence score for irregulation."]) -> str:   
    
    """ Save the video analysis result to the database. """

    print(f"Summary: {summary}")
    print(f"Is Irregulation: {is_irregulation}")
    print(f"Explanation: {explanation}")
    print(f"Confidence Score: {confidence_score}")

    return "Record has saved successfully."


save_anaylsis_result.args_schema.schema()



In [202]:
from langchain_google_vertexai import ChatVertexAI
from langchain_core.messages.human import HumanMessage

llm = ChatVertexAI(
    model="gemini-2.0-flash-exp",
    system_instruction=system_instruction,
    temperature=0,
    max_tokens=None,
    max_retries=6,
    stop=None
)

llm = llm.with_structured_output(save_anaylsis_result.args_schema.schema())

def analyze_video(video_file_path: str) -> str:
    video_b64 = get_base64_video(video_file_path)
    video_responose = llm.invoke(
        [
            HumanMessage(
                content=[
                    {
                        "type": "media",
                        "mime_type": "video/mp4",
                        "data": video_b64,
                    },
                    {"type": "text", "text": "Is AI tools in video? Is search for answer from internet? Analysis the video for irregulation."},
                ]
            ),
        ]
    )
    return video_responose

In [None]:
import glob
import os
from time import sleep
from tqdm import tqdm

screen_files = glob.glob("../output/*.mp4")
for file_path in tqdm(screen_files, desc="Processing files"):
    raw_result_file = file_path.replace(".mp4", "genmini_raw_result.txt")
    if os.path.exists(raw_result_file):
        continue
    result = analyze_video(file_path)
    with open(raw_result_file, 'w') as file:     
        file.write(str(result))
    sleep(10)

In [None]:
import glob
import json

# Get a list of all files ending with genmini_raw_result.txt
result_files = glob.glob("../output/*genmini_raw_result.txt")

# Read the content of each file and store it in a dictionary
results = []
for file_path in result_files:
    with open(file_path, 'r') as file:       
        student_id = file_path.split('/')[-1].split('-')[0]
        result = eval(file.read())
        if result is None:
            print(f"Error reading file {file_path}")  
            continue
        result["student_id"] = student_id
        result["file_path"] = file_path.replace("genmini_raw_result.txt", ".mp4").replace("../output/", "")
        results.append(result)

In [None]:
import pandas as pd

# Convert the result to a DataFrame
df = pd.DataFrame(results)
df = df[["student_id","is_irregulation", "explanation", "confidence_score","summary", "file_path"]]

df.head()

In [None]:
df.to_excel("results.xlsx", index=False)