In [None]:
!nvidia-smi

Wed Mar 30 19:26:42 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
%%capture
import sys
if 'google.colab' in sys.modules:
    !pip install git+https://github.com/huggingface/transformers.git

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model_id = "arampacha/gpt-neo-therapist" #"EleutherAI/gpt-neo-1.3B"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=False,
    use_auth_token=True
).to(device)

tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)

Downloading:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/779k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.01M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/120 [00:00<?, ?B/s]

In [None]:
!wget https://raw.githubusercontent.com/nbertagnolli/counsel-chat/master/data/20200325_counsel_chat.csv

--2022-03-30 19:32:22--  https://raw.githubusercontent.com/nbertagnolli/counsel-chat/master/data/20200325_counsel_chat.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3281269 (3.1M) [text/plain]
Saving to: ‘20200325_counsel_chat.csv’


2022-03-30 19:32:22 (49.1 MB/s) - ‘20200325_counsel_chat.csv’ saved [3281269/3281269]



In [None]:
df = pd.read_csv("20200325_counsel_chat.csv")
df = df[df.split=="test"]

In [None]:
import random

def generate_one(question:str=None, df=None, top_p:float=None, typical_p:float=None, max_length:int=400, print_result:bool=False):
    assert (top_p or typical_p), "Either `top_p` (nuclear) or `typical_p` (typical) is required."
    assert not ((question is None) and (df is None)), "Provide `prompt_text` or `df` to sample from."
    assert question is None or type(question) is str
    if question is None:
        i = random.randint(0, len(df)-1)
        sample = df.iloc[i, :]
        prompt_text = f"Answer like a therapist:\n{sample.questionTitle} {sample.questionText}\nAnswer: "
        ref_answer = sample.answerText
    else:
        prompt_text = f"Answer like a therapist:\n{question}\nAnswer: "
        ref_answer=""
    
    prompt = tokenizer(prompt_text, return_tensors="pt")["input_ids"]

    outputs = model.generate(
        prompt.to(device),
        max_length=prompt.size(1) + max_length, 
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        early_stopping=True,
        top_p=top_p,
        typical_p=typical_p,
        top_k=0,
        temperature=1.,
        num_return_sequences=2,
        repetition_penalty=1.,
    )
    generated_answer = tokenizer.decode(outputs[0, prompt.size(1):], skip_special_tokens=True).strip()
    if print_result:
        print("PROMPT:")
        print(prompt_text)
        print("\nGENERATED ANSWER:")
        print(generated_answer)
        print("\nREFERENCE ANSWER:")
        print(ref_answer)
    res = {
        "prompt": prompt_text,
        "generated_answer":generated_answer,
        "reference_answer":ref_answer
    }
    return res

In [None]:
# Nuclear sampling
res = generate_one(df=df, top_p=0.92, print_result=True)

PROMPT:
Answer like a therapist:
 Is it normal to go into therapy feeling nervous? I've gone to a couple therapy sessions so far and still everytime I walk in I get nervous and shaky. Is this normal? Should I still be feeling like this?
Answer: 

GENERATED ANSWER:
 Do not go to therapy unless you feel comfortable doing so. Therapy should not be used to avoid doing things you need to be able to manage yourself at home. The goal of therapy is to help you learn new coping skills and to help you become more self-sufficient and manage life in a healthier way. Therapy is not a magic pill that will give you the answer to everything that you may be feeling.

REFERENCE ANSWER:
Certainly.


In [None]:
# Typical sampling
res = generate_one(df=df, typical_p=0.8, print_result=True)

PROMPT:
Answer like a therapist:
 I am fearful of social situations and avoiding people all together This is preventing me from getting a job. I have bad relationship with my bipolar mother. I am living with her but it stresses me out so much to be around her that I have locked myself in my room. I have no friends or other family to help me. I have no money for therapy, but I think I need therapy before I can secure a job.
Answer: 

GENERATED ANSWER:
I'm sorry that you are feeling stressed out about the relationship with your mother. This is a difficult situation for everyone involved. I would encourage you to talk with a therapist who can help you work through your thoughts and feelings. I would also recommend checking out the "I am more motivated to succeed than I have ever been in my life" and "Beating self-doubt" pages on my website.

REFERENCE ANSWER:
A good first step would be to make an appointment with your primary care provider & discuss your concerns.  They would have resourc