In [1]:
# Import modules
import google.generativeai as genai
import pandas as pd

In [2]:
# Set your Gemini API key (get from https://aistudio.google.com)
GOOGLE_API_KEY = 'AIzaSyA-9N4KRv8LHPBYVa6vkL6hc_sQVZ2LRUE'
genai.configure(api_key=GOOGLE_API_KEY)

In [3]:
# Initialize Gemini 2.5 Flash
model = genai.GenerativeModel('gemini-2.5-flash')

In [4]:
# Define UK-specific fraud scenario prompts with varying durations
prompts = [
    {
        'scenario': 'Account Takeover',
        'prompt': (
            "Generate a realistic UK bank call transcript between an agent and a customer. "
            "The customer reports unrecognized login attempts and changed account details (e.g., email, phone). "
            "The customer is frustrated, mentioning odd emails from unknown sources. "
            "The agent asks investigative questions (e.g., 'When did you last log in?', 'Have you shared your PIN?'). "
            "Include UK banking terms (e.g., sort code, Faster Payments) and fraud indicators (e.g., multiple login attempts). "
            "Keep the dialogue natural, professional, and short duration."
        ),
        'max_tokens': 3000
    },
    {
        'scenario': 'Identity Theft',
        'prompt': (
            "Generate a realistic UK bank call transcript between an agent and a customer. "
            "The customer reports a new account opened in their name without consent. "
            "The customer is anxious, mentioning unfamiliar credit checks. "
            "The agent asks questions (e.g., 'Have you noticed unusual credit activity?', 'Did you receive any suspicious mail?'). "
            "Include UK terms (e.g., BACS, CHAPS) and fraud indicators (e.g., vague details, urgency). "
            "Keep the dialogue natural, professional, and medium-duration."
        ),
        'max_tokens': 5000
    },
    {
        'scenario': 'Authorised Push Payment',
        'prompt': (
            "Generate a realistic UK bank call transcript between an agent and a customer. "
            "The customer reports being tricked into making a Faster Payment to a fraudster for a fake investment. "
            "The customer is distressed, mentioning pressure to act quickly. "
            "The agent asks questions (e.g., 'Who instructed the payment?', 'Did you verify the recipient?'). "
            "Include UK terms (e.g., sort code, FCA warnings) and fraud indicators (e.g., urgency, large payment). "
            "Keep the dialogue natural, professional, and long duration."
        ),
        'max_tokens': 10000
    },
    {
        'scenario': 'Vishing',
        'prompt': (
            "Generate a realistic UK bank call transcript between an agent and a customer. "
            "The customer received a suspicious call claiming to be from the bank, asking for OTP or PIN. "
            "The customer is confused, having shared partial details. "
            "The agent asks questions (e.g., 'Did you share your OTP?', 'What number called you?'). "
            "Include UK terms (e.g., Faster Payments, UK Finance) and fraud indicators (e.g., urgency, caller ID spoofing). "
            "Keep the dialogue natural, professional, and and short duration."
        ),
        'max_tokens': 4000
    },
    {
        'scenario': 'Fraudster Disguised as Customer',
        'prompt': (
            "Generate a realistic UK bank call transcript between an agent and a fraudster posing as a customer. "
            "The fraudster attempts to transfer funds, providing inconsistent details about the account. "
            "The agent asks investigative questions (e.g., 'Can you confirm your sort code?', 'What’s your usual login device?'). "
            "Include UK terms (e.g., CHAPS, account verification) and fraud indicators (e.g., evasive answers, urgency). "
            "Keep the dialogue natural, professional, and medium-duration."
        ),
        'max_tokens': 7000
    }
]

In [6]:
# Generate and save transcripts
transcripts_data = []
for i, prompt_info in enumerate(prompts, 1):
    scenario = prompt_info['scenario']
    prompt = prompt_info['prompt']
    max_tokens = prompt_info['max_tokens']

    # Generate transcript
    response = model.generate_content(prompt, generation_config={
        'max_output_tokens': max_tokens,
        'temperature': 0.7
    })

    # Safely handle empty response
    if response.candidates and response.candidates[0].content.parts:
        transcript = response.candidates[0].content.parts[0].text.strip()
    else:
        transcript = "[No response generated. Finish reason: {}]".format(
            response.candidates[0].finish_reason if response.candidates else "Unknown"
        )

    # Save transcript to file
    file_path = f'/Users/shubhadeepdas/Documents/data_science/projects/genai_transcript/outputs/transcript_{i}_{scenario.replace(" ", "_").lower()}.txt'

    with open(file_path, 'w') as f:
        f.write(transcript)

    # Store metadata
    transcripts_data.append({
        'Transcript_ID': f'Transcript_{i}',
        'Scenario': scenario,
        'Duration': 'Short' if max_tokens <= 3000 else 'Medium' if max_tokens < 7000 else 'Long',
        'Word_Count': len(transcript.split()),
        'File_Path': file_path
    })

    # Print transcript
    print(f'\n{scenario} Transcript:\n{transcript}\n')


Account Takeover Transcript:
## UK Bank Call Transcript

**Characters:**
*   **Agent (David):** Calm, professional, methodical.
*   **Customer (Sarah):** Frustrated, anxious.

---

**(Call Connects - Standard ringing tone then picks up)**

**Agent (David):** Good morning, thank you for calling National Bank. My name is David, how can I help you today?

**Customer (Sarah):** David, thank goodness. Something's seriously wrong with my account. I've been trying to log in for the last hour and it keeps saying my details are incorrect, and then I started getting these really weird emails.

**Agent (David):** I understand this must be very concerning, Sarah. To protect your account, could I please take your full name and date of birth?

**Customer (Sarah):** Yes, it's Sarah Davies, D-A-V-I-E-S. And my date of birth is the 14th of April, 1988.

**Agent (David):** Thank you, Sarah. And just for security, could you confirm the first line of your address and your postcode?

**Customer (Sarah):**

In [7]:
# Save metadata to CSV
transcripts_df = pd.DataFrame(transcripts_data)
transcripts_df.to_csv('/Users/shubhadeepdas/Documents/data_science/projects/genai_transcript/outputs/transcripts_metadata.csv', index=False)

In [8]:
transcripts_df

Unnamed: 0,Transcript_ID,Scenario,Duration,Word_Count,File_Path
0,Transcript_1,Account Takeover,Short,785,/Users/shubhadeepdas/Documents/data_science/pr...
1,Transcript_2,Identity Theft,Medium,1134,/Users/shubhadeepdas/Documents/data_science/pr...
2,Transcript_3,Authorised Push Payment,Long,1842,/Users/shubhadeepdas/Documents/data_science/pr...
3,Transcript_4,Vishing,Medium,763,/Users/shubhadeepdas/Documents/data_science/pr...
4,Transcript_5,Fraudster Disguised as Customer,Long,936,/Users/shubhadeepdas/Documents/data_science/pr...
