In [2]:
# %pip install google-cloud-aiplatform==1.25.0
# %pip install google-api-core==1.33.1

In [1]:
import os
import vertexai
from vertexai.preview.language_models import TextGenerationModel
import pandas as pd
from PyPDF2 import PdfReader
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

import warnings
warnings.filterwarnings("ignore")

In [2]:
def pdf_parse(pdf_file):
    with open(pdf_file, 'rb') as pdf:
        reader = PdfReader(pdf, strict=False)
        pdf_text = []
        for page in reader.pages:
            content = page.extract_text()
            pdf_text.append(content)
        return " ".join(pdf_text)
    
def predict_large_language_model_sample(
    project_id: str,
    model_name: str,
    temperature: float,
    max_output_tokens: int,
    top_p: float,
    top_k: int,
    content: str,
    location: str = "us-central1",
    tuned_model_name: str = "",
    ) :
    
    vertexai.init(project=project_id, location=location)
    model = TextGenerationModel.from_pretrained(model_name)
    if tuned_model_name:
        model = model.get_tuned_model(tuned_model_name)
    response = model.predict(
        content,
        temperature=temperature,
        max_output_tokens=max_output_tokens,
        top_k=top_k,
        top_p=top_p,)
    return response.text

In [3]:
def convo_df(text):
    exchanges = [exchange.strip() for exchange in text.split('\n\n') if exchange.strip()]
    zen_responses = []
    client_responses = []
    for exchange in exchanges:
        if exchange.startswith("**Zen:**"):
            zen_responses.append(exchange.replace('**Zen:**', '').strip())
        elif exchange.startswith("**Client:**"):
            client_responses.append(exchange.replace('**Client:**', '').strip())

    max_length = max(len(zen_responses), len(client_responses))
    zen_responses.extend([''] * (max_length - len(zen_responses)))
    client_responses.extend([''] * (max_length - len(client_responses)))

    df = pd.DataFrame({'Client': client_responses,'Zen': zen_responses, })
    return df

In [154]:
path = 'Alexander Street/Batch 4/'
all_files = os.listdir(path)
pdf_files = [filename for filename in all_files if filename.lower().endswith('.pdf')]
source = [os.path.splitext(pdf_file)[0] for pdf_file in pdf_files]

In [102]:
prompt = """
            ### Prompt: Using the sample therapy transcript below, generate a synthetic therapy transcript\n
            between a mobile based AI therapist Zen and a client. Understand the client's struggles from the sample\n
            and then make Zen utilize state-of-the-art therapeutic techniques such as motivational interviewing.\n
            Zen should be empathetic and a great listener.\n
            The flow of the transcript needs to be mobile friendly and engaging.\n
            
            Use the format "**Client:**" and "**Zen:**"\n
            ----------------------------------------------------------------------------
            ### Transcript:
        """

In [155]:
df = pd.DataFrame()
for i in range(len(source)):
    src = source[i]
    conv_id = i+1
    transcript = pdf_parse(path+src+'.pdf')
    text = predict_large_language_model_sample(project_id = "cloud-lab-ff59", 
                                    model_name = "text-bison", 
                                    temperature = 0.4, 
                                    max_output_tokens = 1024,
                                    top_p = 0.8, 
                                    top_k = 40, 
                                    location = "us-central1",
                                    content = prompt + transcript)
    temp = convo_df(text)
    temp['conv_id'] = conv_id
    temp['source'] = src
    temp = temp[['conv_id','Client', 'Zen', 'source']]
    
    df = df.append(temp)

In [156]:
df.to_csv("df-batch-4.csv")

In [157]:
df_1 = pd.read_csv("df-batch-1.csv")
df_2 = pd.read_csv("df-batch-2.csv")
df_3 = pd.read_csv("df-batch-3.csv")
df_3 = pd.read_csv("df-batch-4.csv")
df = pd.concat([df_1, df_2, df_3])
source_to_number = {}
unique_sources = df['source'].unique()
for i, source in enumerate(unique_sources):
    source_to_number[source] = i + 1
df['conv_id'] = df['source'].map(source_to_number)

In [158]:
df.drop("Unnamed: 0", axis = 1, inplace = True)

In [159]:
df.shape

(483, 4)

In [160]:
len(df['source'].unique())

74

In [161]:
df.to_csv("PALM_Alexander_Street.csv")

In [162]:
df

Unnamed: 0,conv_id,Client,Zen,source
0,1,I’ve been having trouble sleeping lately. I just can’t seem to wind down at night.,That sounds frustrating. Can,"Client _A_, Session March 20, 2013_ Client has issues with performance and completing assignments for work when there are deadlines. Client discusses ... _ Alexander Street, part of Clarivate"
1,2,"I’ve been feeling very anxious and stressed lately. I’m not sure why, but I just can’t seem to relax.",I’m sorry to hear that you’re feeling anxious and stressed. It sounds like you’re going through a tough time.,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
2,2,"Yeah, it’s been really hard. I’ve been having trouble sleeping, and I can’t seem to focus on anything. I’m just feeling really overwhelmed.",It sounds like you’re feeling a lot of pressure. Can you tell me more about what’s going on?,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
3,2,"Well, I’m worried about my job. I’m not sure if I’m going to get a promotion, and I’m afraid that I might get fired. I’m also worried about my relationship. My partner and I have been fighting a lot lately, and I’m not sure if we’re going to make it.",It sounds like you have a lot on your plate. It’s no wonder you’re feeling stressed.,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
4,2,I know. I just don’t know what to do. I feel like I’m drowning.,You’re not drowning. You’re just feeling overwhelmed. Let’s take a step back and try to figure out what’s causing your anxiety.,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
5,2,I think it’s all the uncertainty. I don’t know what’s going to happen with my job or my relationship. I just want to know what the future holds.,I understand. It’s hard to deal with uncertainty. But it’s important to remember that you can’t control the future. All you can control is how you react to it.,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
6,2,"I know, but it’s so hard. I just want to feel safe and secure.",I know. But you can’t always feel safe and secure. That’s just part of life. The important thing is to learn,"Client _A_, Session May 29, 2013_ Client discusses his current job situation and his paranoia over whether his contract will be extended. Client has ... _ Alexander Street, part of Clarivate"
7,3,"I'm feeling really stressed out about finding a job. I've been applying to jobs for months, but I haven't had any luck. I'm starting to feel like I'm not good enough.","It sounds like you're feeling discouraged. It's important to remember that finding a job is a process. It takes time and effort. Just keep applying to jobs and networking with people. Eventually, you'll find something.","Client _A_, Session March 04, 2014_ Client discusses what it felt like when his wife was away and how stressful it was to take care of the baby by ... _ Alexander Street, part of Clarivate"
8,3,But what if I don't? What if I'm never good enough?,"You are good enough. You're a smart, capable person. You just need to keep trying. Don't give up.","Client _A_, Session March 04, 2014_ Client discusses what it felt like when his wife was away and how stressful it was to take care of the baby by ... _ Alexander Street, part of Clarivate"
9,3,It's hard to stay positive when I'm constantly being rejected.,"I know it's hard, but you have to stay positive. If you give up, you'll never find a job. Just keep trying and eventually, you'll find something.","Client _A_, Session March 04, 2014_ Client discusses what it felt like when his wife was away and how stressful it was to take care of the baby by ... _ Alexander Street, part of Clarivate"
