#**Task.1: Synthetic Data Generation**

In [None]:
!pip install openai

In [None]:
import openai
import time
import re
import json
import pandas as pd
import csv
import os
import sys
from google.colab import userdata

In [None]:
def get_environment():
    if 'google.colab' in sys.modules:
        return 'colab'
    elif 'notebook' in sys.modules:
        return 'jupyter'
    else:
        return 'unknown'

env = get_environment()
print(f"Running in: {env}")

In [None]:
if env == 'colab':
    aim_api_key = userdata.get('AIM_API_KEY')
elif env == 'jupyter':
    aim_api_key = os.getenv('AIM_API_KEY')

In [None]:
pd.DataFrame(columns=['Doctor', 'Patient']).to_csv("data_generated.csv",index=False)

In [None]:
system_content_1 = "You are an expert in Cognitive Behavioural Therapy (CBT)."
user_content = """Assume a setting where a Therapist in a conversation with a visitor.The visitor is mostly a patient but can be occassionally a close relative
to the patient like like patient's father or mother as well. Generate a conversation set between the therapist and visitor, with 6 consecutive dialogues in a dictionary format,
for example, each of the 6 dialogues should be in this format -- {"therapist":"Good morning,what can I do for you?", "visitor": "I am not feeling well."}.
The dialogue should be focussed around CBT and sound very natural. Ensure that each dialogue  has 'therapist' string and a corresponding 'visitor' string and is symantically and syntactically complete".
Also do not output metadata like "**Dialogue 1**". """

client = openai.OpenAI(
    api_key=aim_api_key,
    base_url="https://api.aimlapi.com",
)

for i in range(1000):
  try:
    chat_completion = client.chat.completions.create(
      model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
      messages=[
          {"role": "system", "content": system_content_1},
          {"role": "user", "content": user_content},
      ],
      temperature=0.7,
      max_tokens=512,
    )
    responses=[]
    response = chat_completion.choices[0].message.content
    print(response)
    dialogue_matches = re.findall(r'\{.*?\}', response, re.DOTALL)
    for dialogue in dialogue_matches:
        parsed_dialogue = json.loads(dialogue)
        d = parsed_dialogue["therapist"]
        # .replace('(','').replace(')', '')
        p = parsed_dialogue["visitor"]
        # print(d)
        # print(p)
        responses.append([d,p])
    print(responses)
    with open("data_generated.csv", mode='a', newline='') as f:
      writer = csv.writer(f)
      for r in responses:
        writer.writerow(r)


  except KeyError:
    pass

  except openai.RateLimitError as e:
    print(f"Rate Limit reached")
    break

In [None]:
df =pd.read_csv("data_generated.csv").head(10)
for i, row in df.iterrows():
  print(f"Obseravtion: {i}")
  print(df.loc[i,'Doctor'])
  print(df.loc[i,'Patient'])
  print('*'*50)

#**Task.2: Fine Tuning with TinyLLAMA**

In [None]:
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
!pip install triton torch==2.3.1
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install wandb

In [None]:
import torch
from unsloth import FastLanguageModel

# Configuration settings
max_seq_length = 2048  # Maximum sequence length supported by the model
dtype = None           # Set to None for auto-detection, Float16 for T4/V100, Bfloat16 for Ampere GPUs
load_in_4bit = True    # Enable 4-bit loading for memory efficiency

# Load the model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-bnb-4bit",  # Model name for 4-bit precision loading
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    # token = "hf_...", # Uncomment and use if working with gated models like Meta's LLaMA-2
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank, affects the number of trainable parameters
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj","embed_tokens", "lm_head",],
    lora_alpha=16,
    lora_dropout=0,  # Dropout for regularization, currently set to 0
    bias="none",     # No bias used in this configuration
    use_gradient_checkpointing=True,  # Useful for reducing memory usage during training
    use_rslora=False,  # Rank Stabilized LoRA, set to False in this case
    loftq_config=None, # LoRA with Quantization, not used here
)

In [None]:
data = pd.read_csv("data_generated.csv")

In [None]:
data.shape

In [None]:
import json
instruction_text = """You are an Expert Therapist specialized in Cognitive Behavioural Therapy (CBT).
                      Respond to to Patient's remarks in a polite, professional, contextually meaningful and useful manner."""

data_list = data.to_dict(orient="records")

for entry in data_list:
    entry["instruction"] = instruction_text
    entry["input"] = entry.pop("Patient")
    entry["output"] = entry.pop("Doctor")

with open('json_data.json', 'w') as f:
    json.dump(data_list, f, indent=4)

In [None]:
pd.read_json('json_data.json').head(3)

In [None]:
from datasets import load_dataset, DatasetDict
dataset = load_dataset("json", data_files="json_data.json")

In [None]:
dataset= DatasetDict({'train': dataset['train']})
dataset

In [None]:
# Define a prompt template
prompt = """Below is an instruction that describes a task, paired with an input. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

# Function to format the prompts
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

# Apply the formatting function to the dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

# Split the dataset into training and testing sets
dataset_dict = dataset['train'].train_test_split(test_size=0.3)

train_dataset = dataset_dict["train"]
eval_dataset = dataset_dict["test"]

In [None]:
train_dataset.shape

In [None]:
eval_dataset.shape

In [None]:
!pip install wandb

In [None]:
def get_environment():
    if 'google.colab' in sys.modules:
        return 'colab'
    elif 'notebook' in sys.modules:
        return 'jupyter'
    else:
        return 'unknown'

env = get_environment()
print(f"Running in: {env}")

In [None]:
if env == 'colab':
    wandb_api_key = userdata.get('WANDB_API_KEY')
elif env == 'jupyter':
    wandb_api_key = os.getenv('WAND_API_KEY')

In [None]:
import wandb

# Log in to W&B - you'll be prompted to input your API key
wandb.login(key = wandb_api_key)

# Set W&B environment variables
%env WANDB_WATCH=all
%env WANDB_SILENT=true

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from transformers.utils import logging
import wandb

logging.set_verbosity_info()

# Initialize W&B
project_name = "tiny-llama"
entity = "wandb"
wandb.init(project=project_name, name="unsloth-tiny-llama")

# Define training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,           # Small batch size due to limited GPU memory
    gradient_accumulation_steps=4,           # Accumulate gradients over 4 steps
    evaluation_strategy="steps",             # Evaluate after a certain number of steps
    warmup_ratio=0.1,                        # Warm-up learning rate over 10% of training
    num_train_epochs=1,                      # Number of epochs
    learning_rate=2e-4,                      # Learning rate for the optimizer
    fp16=not is_bfloat16_supported(),        # Use FP16 if BF16 is not supported
    bf16=is_bfloat16_supported(),            # Use BF16 if supported (more efficient on Ampere GPUs)
    max_steps=20,                            # Cap training at 20 steps for quick experimentation, increase or comment out as you see fit
    logging_steps=1,                         # Log metrics every step
    optim="adamw_8bit",                      # Use 8-bit AdamW optimizer to save memory
    weight_decay=0.1,                        # Regularization to avoid overfitting
    lr_scheduler_type="linear",              # Use linear learning rate decay
    seed=3407,                               # Random seed for reproducibility
    report_to="wandb",                       # Enable logging to W&B
    output_dir="outputs",                    # Directory to save model outputs
)

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,     # Training dataset
    eval_dataset=eval_dataset,       # Evaluation dataset
    dataset_text_field="text",               # The field containing text in the dataset
    max_seq_length=max_seq_length,           # Max sequence length for inputs
    dataset_num_proc=2,                      # Number of processes for dataset loading
    packing=True,                            # Packs short sequences together to save time
    args=training_args,                      # Training arguments defined earlier
)

In [None]:
# Start training the model
trainer.train()

# Finish and close the W&B session
wandb.finish()

In [None]:
model

In [None]:
FastLanguageModel.for_inference(model)  # Enable faster inference
inputs = tokenizer(
    [
        prompt.format(
            "You are an Expert Therapist specialized in Cognitive Behavioural Therapy (CBT). Respond to to Patient's remarks in a polite, professional, contextually meaningful and useful manner.",  # instruction
            "I am feeling tired and anxious",  # input
            "",  # output - leave this blank for generation!
        )
    ], return_tensors="pt"
).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
# print(tokenizer.decode(outputs[0]),'\n\n')
chars_to_remove = re.compile('<.*?>')
outputs = tokenizer.decode(outputs[0]).split('### Response:')[1].split('###')[0].strip()
response = re.sub(chars_to_remove, '', outputs)

print(f"Therapist: {response}")

In [None]:
def get_environment():
    if 'google.colab' in sys.modules:
        return 'colab'
    elif 'notebook' in sys.modules:
        return 'jupyter'
    else:
        return 'unknown'

env = get_environment()
print(f"Running in: {env}")

In [None]:
if env == 'colab':
    hf_api_key = userdata.get('HF_API_KEY')
elif env == 'jupyter':
    hf_api_key = os.getenv('HF_API_KEY')

In [None]:
from huggingface_hub import login
login(token=hf_api_key)

In [None]:
model.push_to_hub("TinyLLAMA_VS_test")
tokenizer.push_to_hub("TinyLLAMA_VS_test")

In [None]:
from transformers import AutoTokenizer
from unsloth import FastLanguageModel
import torch

hf_model = FastLanguageModel.from_pretrained("vsrinivas/TinyLLAMA_VS_test")
hf_tokenizer = AutoTokenizer.from_pretrained("vsrinivas/TinyLLAMA_VS_test")

In [None]:
prompt = """Below is an instruction that describes a task, paired with an input. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [None]:
hf_model

In [None]:
FastLanguageModel.for_inference(hf_model[0])  # Enable faster inference
inputs = hf_tokenizer(
    [
        prompt.format(
            "You are an Expert Therapist specialized in Cognitive Behavioural Therapy (CBT). Respond to to Patient's remarks in a polite, professional, contextually meaningful and useful manner.",  # instruction
            "I am feeling tired and anxious",  # input
            "",  # output - leave this blank for generation!
        )
    ], return_tensors="pt"
).to("cuda")

outputs = hf_model[0].generate(**inputs, max_new_tokens=64, use_cache=True)
# print(hf_tokenizer.decode(outputs[0]),'\n\n')
chars_to_remove = re.compile('<.*?>')
outputs = hf_tokenizer.decode(outputs[0]).split('### Response:')[1].split('###')[0].strip()
response = re.sub(chars_to_remove, '', outputs)

print(f"Therapist: {response}")

#**Task.3: Cloud Protype Development**


In [None]:
!pip install pyngrok

In [None]:
def get_environment():
    if 'google.colab' in sys.modules:
        return 'colab'
    elif 'notebook' in sys.modules:
        return 'jupyter'
    else:
        return 'unknown'

env = get_environment()
print(f"Running in: {env}")

In [None]:
if env == 'colab':
    ngrok_api_key = userdata.get('NGROK_API_KEY')
elif env == 'jupyter':
    ngrok_api_key = os.getenv('NGROK_API_KEY')

In [None]:
!ngrok authtoken ngrok_api_key
from pyngrok import ngrok

In [None]:
from flask import Flask, request, jsonify
# from flask_ngrok import run_with_ngrok
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [None]:
usr_msgs= ["My daughter has been feeling really anxious about school lately, and I'm worried it's affecting her grades.",
"Well, she's been complaining about having butterflies in her stomach before every test, and she's been having a hard time sleeping at night because she's worried about not doing well.",
"Actually, yes. She's been really hard on herself when she makes a mistake on a test, and I think that's when the anxiety really takes over. ",
]

In [None]:
len(usr_msgs)

In [None]:
app = Flask(__name__)

# Set up ngrok tunnel
public_url = ngrok.connect(5000)
print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000/\"")

@app.route('/')
def predict():
    output_list=[]
    for um in usr_msgs:
        inputs = hf_tokenizer(
            [
                prompt.format(
                    "You are an Expert Therapist specialized in Cognitive Behavioural Therapy (CBT). Respond to to Patient's remarks in a polite, professional, contextually meaningful and useful manner.",  # instruction
                    um,  # input
                    "",  # output - leave this blank for generation!
                )
            ], return_tensors="pt"
        ).to("cuda")
        outputs = hf_model[0].generate(**inputs, max_new_tokens=64, use_cache=True)
        chars_to_remove = re.compile('<.*?>')
        outputs = hf_tokenizer.decode(outputs[0]).split('### Response:')[1].split('###')[0].strip()
        outputs = re.sub(chars_to_remove, '', outputs)
        output_list.append({"Patient":um, "Therapist":outputs})
    print(output_list)
    return jsonify(output_list)

if __name__ == '__main__':
    app.run()