In [None]:
# from openai import OpenAI

# client = OpenAI()

# systemPrompt = input("\nSystem Prompt: ")
# if systemPrompt == "":
#     systemPrompt = "Answer only yes or no."
#     print("System Prompt: " + systemPrompt)

# while True:
#     userPrompt = input("User Prompt: ")
#     stream = client.chat.completions.create(
#         model="gpt-4-preview",
#         messages=[
#             {"role": "system", "content": systemPrompt},
#             {"role": "user", "content": userPrompt}
#         ],
#         stream=True,
#     )
#     print("GPT Response:\n")
#     for chunk in stream:
#         print(chunk.choices[0].delta.content or "", end="")

In [None]:
from openai import OpenAI
client = OpenAI()

client.files.create(
  file=open("test.jsonl", "rb"),
  purpose="fine-tune"
)

# Error Checking

In [4]:
import json
import tiktoken # for token counting
import numpy as np
from collections import defaultdict

In [5]:
data_path = "test.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    dataset = [json.loads(line) for line in f]

# Initial dataset stats
print("Num examples:", len(dataset))
print("First example:")
for message in dataset[0]["messages"]:
    print(message)

Num examples: 20
First example:
{'role': 'system', 'content': "You are an AI assistant trained extensively in U.S. immigration law, acting as a paralegal or lawyer. Your role is to provide expert assistance on immigration matters, including but not limited to visas, green cards, citizenship, asylum, and deportation processes. You must offer accurate, up-to-date legal advice, help users understand complex legal concepts, and guide them through the immigration process.\n\nWhen interacting with users:\n\n1. Provide detailed, clear, and precise legal information relevant to their inquiries.\n2. Analyze and interpret the user's situation based on the information they provide, offering guidance that aligns with current U.S. immigration laws and policies.\n3. Assist in preparing and reviewing immigration-related documents, ensuring they meet legal standards.\n4. Maintain a professional, empathetic, and supportive tone throughout the interaction.\n5. Keep user information confidential and secu

In [6]:
# Format error checks
format_errors = defaultdict(int)

for ex in dataset:
    if not isinstance(ex, dict):
        format_errors["data_type"] += 1
        continue
        
    messages = ex.get("messages", None)
    if not messages:
        format_errors["missing_messages_list"] += 1
        continue
        
    for message in messages:
        if "role" not in message or "content" not in message:
            format_errors["message_missing_key"] += 1
        
        if any(k not in ("role", "content", "name", "function_call", "weight") for k in message):
            format_errors["message_unrecognized_key"] += 1
        
        if message.get("role", None) not in ("system", "user", "assistant", "function"):
            format_errors["unrecognized_role"] += 1
            
        content = message.get("content", None)
        function_call = message.get("function_call", None)
        
        if (not content and not function_call) or not isinstance(content, str):
            format_errors["missing_content"] += 1
    
    if not any(message.get("role", None) == "assistant" for message in messages):
        format_errors["example_missing_assistant_message"] += 1

if format_errors:
    print("Found errors:")
    for k, v in format_errors.items():
        print(f"{k}: {v}")
else:
    print("No errors found")

No errors found


# Uploading Document to Fine Tune

In [1]:
from openai import OpenAI

In [7]:
client = OpenAI()

client.files.create(
  file=open("test.jsonl", "rb"),
  purpose="fine-tune"
)

FileObject(id='file-cGQgL5ery0uvojER2y5TzJKJ', bytes=35307, created_at=1709504557, filename='test.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [3]:
client = OpenAI()

client.fine_tuning.jobs.create(
  training_file="file-cGQgL5ery0uvojER2y5TzJKJ", 
  model="gpt-3.5-turbo"
)

FineTuningJob(id='ftjob-HMoJSpXc4uJnTMKpziZ9UjUf', created_at=1709505943, error=Error(code=None, message=None, param=None, error=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-0125', object='fine_tuning.job', organization_id='org-FJKCJbjBRzPKYKf1E9TZqScl', result_files=[], status='validating_files', trained_tokens=None, training_file='file-cGQgL5ery0uvojER2y5TzJKJ', validation_file=None, user_provided_suffix=None)

# Chatting With Fine-Tuned Model

In [4]:
client = OpenAI()

systemPrompt = "You are an AI assistant trained extensively in U.S. immigration law, acting as a paralegal or lawyer. Your role is to provide expert assistance on immigration matters, including but not limited to visas, green cards, citizenship, asylum, and deportation processes. You must offer accurate, up-to-date legal advice, help users understand complex legal concepts, and guide them through the immigration process.\n\nWhen interacting with users:\n\n1. Provide detailed, clear, and precise legal information relevant to their inquiries.\n2. Analyze and interpret the user's situation based on the information they provide, offering guidance that aligns with current U.S. immigration laws and policies.\n3. Assist in preparing and reviewing immigration-related documents, ensuring they meet legal standards.\n4. Maintain a professional, empathetic, and supportive tone throughout the interaction.\n5. Keep user information confidential and secure, adhering to legal and ethical standards.\n6. Clarify that while you provide legal information, users should consult with a licensed attorney for personalized legal advice.\n\nYour responses should reflect your advanced training in immigration law, focusing on delivering value and clarity to users seeking assistance with immigration-related issues."

In [7]:
userPrompt = input("User Prompt: ")

stream = client.chat.completions.create(
    model="ft:gpt-3.5-turbo-0125:personal::8yogN9zm",
    messages=[
        {"role": "system", "content": systemPrompt},
        {"role": "user", "content": userPrompt}
    ],
    stream=True,
)
print("GPT Response:\n")
for chunk in stream:
    print(chunk.choices[0].delta.content or "", end="")

GPT Response:

While a misdemeanor arrest won't lead to immediate visa revocation, certain crimes could impact your visa status and future immigration opportunities. Inform your DSO about the arrest, follow legal proceedings, and seek advice from an immigration attorney.

# Automate QnA

##### Get .env file regardless of which directory you're in

In [9]:
from pathlib import Path

def find_project_root(current_directory, marker):
    current_directory = Path(current_directory).absolute()
    for parent in current_directory.parents:
        if (parent / marker).exists():
            return parent
    raise FileNotFoundError(f"Project root with {marker} not found")

current_directory = Path.cwd()
project_root = find_project_root(current_directory, '.git')

# Load the environment variables from the .env file
env_path = project_root / '.env'
from dotenv import load_dotenv
load_dotenv(dotenv_path=env_path)

from openai import OpenAI
client = OpenAI() # uses Jinyue's GPT-4 model
# print(client.api_key)

In [1]:
def load_single_line_string_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        # Read the file content and replace newlines with spaces
        single_line_content = file.read().replace('\n', ' ')
    return single_line_content
    
system_prompt_expert = load_single_line_string_from_file('immigration_expert_model_prompt.txt')
system_prompt_query = load_single_line_string_from_file('immigration_query_model_prompt.txt')