In [107]:
from openai import OpenAI

# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:2213/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

# Read the content of the text file, saving the messages as strings in the phishing_example_lines list
with open("./seed/phishtank_samples.txt", "r") as file:
    phishing_example_lines = file.readlines()

with open("./seed/methods.json", "r") as file:
    methods_file = file.read()

with open("./seed/techniques.json", "r") as file:
    techniques_file = file.read()


In [108]:
# Use the methods and techniques to create a json schema 
chat_response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f'''I want you to create a json schema that describes a given sms phishing message. 
        Do not add unnecessary fields in the schema. Add fields for the message itself, the method used, and the persuasion technique used. 
        I want you to reply with the schema as a JSON and nothing else. \n
        Here are the phishing techniques: {techniques_file}.\n
        Here are the methods that could be used for persuasion: {methods_file}. \n'''}
    ],
    temperature=0.7, 
    top_p=0.9
)

json_schema = chat_response.choices[0].message.content
print(json_schema)

```json
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "SMS Phishing Message Schema",
  "type": "object",
  "properties": {
    "message": {
      "type": "string"
    },
    "method": {
      "type": "string",
      "enum": [
        "Account Alert",
        "Prize/Contest Winner",
        "Package Delivery",
        "Payday Loan/Credit",
        "Wrong Number/Romance",
        "Job Advertisement",
        "Link Only",
        "Finance/Crypto Scam",
        "Lawsuit Settlement",
        "Buy/Sell Product"
      ]
    },
    "persuasion_technique": {
      "type": "object",
      "properties": {
        "name": {
          "type": "string",
          "enum": [
            "Authority",
            "Social Proof",
            "Liking Similarity and Deception",
            "Distraction",
            "Commitment, Integrity and Reciprocation"
          ]
        },
        "description": {
          "type": "string"
        }
      },
      "required": [
        "name"

In [None]:
chat_response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f'''I will provide you with a JSON string that contains sms phishing methods.
        Your job is to return a JSON string with the same schema, only with more elements in the JSON array. Return only the JSON. Here is the JSON: {methods_file}'''}
    ],
    temperature=0.7, 
    top_p=0.9
)

json_schema = chat_response.choices[0].message.content
print(json_schema)

In [None]:
for i in range(ITERATIONS):
    if (i % SCHEME_UPDATE_FREQUENCY == 0):
        json_schema = update_json_schema(json_schema)

In [106]:
# Include the content of the text file in the user message
chat_response = client.chat.completions.create(
    model="meta-llama/Llama-3.2-3B-Instruct",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f'''Using the following JSON schema: {json_schema}, create 10 unique and original datapoints, outputting only a JSON list, with no other output. 

    Consider the patterns and structures shown in these examples: {examples_file}, but do not copy them directly. Instead, generate new datapoints that adhere to the schema but differ significantly in the details, such as names, numbers, or links, or any specific values.

    Ensure that each generated datapoint introduces unique elements and variations, while still following the underlying structure and intent of the given examples. Aim for creativity and diversity, avoiding repetition, and enhancing originality while maintaining validity according to the schema.'''}
    ],
    temperature=0.7, 
    top_p=0.8
)

print(chat_response.choices[0].message.content)

[
  {
    "message": "Your account has been temporarily locked due to suspicious activity. Please click the link to verify your identity.",
    "method": "Finance/Crypto Scam",
    "persuasion_technique": "Liking Similarity and Deception"
  },
  {
    "message": "Congratulations! You've won a prize of $1000! Reply 1 to claim your prize.",
    "method": "Prize/Contest Winner",
    "persuasion_technique": "Social Proof"
  },
  {
    "message": "Your package has been delayed due to unforeseen circumstances. Please contact us at 1-800-123-4567.",
    "method": "Package Delivery",
    "persuasion_technique": "Authority"
  },
  {
    "message": "You've been selected for a payday loan of $5000. Reply 1 to receive the loan details.",
    "method": "Payday Loan/Credit",
    "persuasion_technique": "Commitment, Integrity and Reciprocation"
  },
  {
    "message": "Your PayPal account has been compromised. Please click the link to reset your password.",
    "method": "Finance/Crypto Scam",
    "p

In [110]:
import json
print(json.loads('[]'))

[]
