In [1]:
import sys
if 'google.colab' in sys.modules:
    !pip install -Uqq transformers datasets

In [2]:
import random
import pandas as pd
from IPython.display import display, HTML

from transformers import AutoTokenizer
from datasets import Dataset, DatasetDict

# Get the data

In [3]:
!mkdir ./data && cd data && wget https://raw.githubusercontent.com/nbertagnolli/counsel-chat/master/data/20200325_counsel_chat.csv

mkdir: cannot create directory ‘./data’: File exists


In [4]:
!ls -hl data

total 6.1M
-rw-r--r-- 1 root root 3.2M Mar 28 11:04 20200325_counsel_chat.csv
-rw-r--r-- 1 root root 2.9M Mar 28 11:04 counselchat-data.csv


In [5]:
df = pd.read_csv("data/20200325_counsel_chat.csv", index_col=0)

In [6]:
n_qs = len(df.questionID.unique())
n_topics = len(df.topic.unique())
print(f"Total number of samples {df.shape[0]}, {n_qs} unique questions on {n_topics} topics")

Total number of samples 2129, 815 unique questions on 31 topics


In [7]:
# standardise spaces
df["questionTitle"] = df.questionTitle.map(lambda x: " ".join(x.split()))
df["questionText"] = df.questionText.map(lambda x: " ".join(x.split()))
df["answerText"] = df.answerText.map(lambda x: " ".join(x.split()))

df["prompt"] = "Answer like a therapist:\n" + df.questionTitle + df.questionText + "\nAnswer: "
df["fullText"] = df.prompt + df.answerText
df.head(3)

Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views,split,prompt,fullText
0,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,"If everyone thinks you're worthless, then mayb...",1,2899,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...
1,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Robin Landwehr, DBH, LPCC, NCCMental Health in...",https://counselchat.com/therapists/robin-landw...,"Hello, and thank you for your question and see...",1,3514,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...
2,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Lee KingI use an integrative approach to treat...,https://counselchat.com/therapists/lee-king,First thing I'd suggest is getting the sleep y...,0,5,train,Answer like a therapist:\nCan I change my feel...,Answer like a therapist:\nCan I change my feel...


Let's compute prompt and answer length length in tokens:

In [8]:
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

def get_length(text):
    return len(tokenizer(text)["input_ids"])

df["prompt_length"] = df.prompt.map(get_length)
df["answer_length"] = df.answerText.map(get_length)
df["full_length"] = df.fullText.map(get_length)

In [9]:
df.describe()

Unnamed: 0,questionID,upvotes,views,prompt_length,answer_length,full_length
count,2129.0,2129.0,2129.0,2129.0,2129.0,2129.0
mean,346.854861,0.489901,198.604979,84.918271,204.780648,288.648661
std,273.706241,0.942429,300.31428,55.57665,151.817316,165.082355
min,0.0,0.0,2.0,23.0,2.0,50.0
25%,78.0,0.0,58.0,53.0,106.0,178.0
50%,321.0,0.0,107.0,75.0,164.0,248.0
75%,588.0,1.0,210.0,102.0,252.0,342.0
max,884.0,9.0,3514.0,668.0,1108.0,1208.0


In [10]:
for name, group in df.groupby("split"):
    print(f"{name} split contains {len(group)} samples ({len(group.questionID.unique())} unique questions)")

    # bla = group[["questionTitle", "questionID"]].groupby(["questionTitle"]).count()

test split contains 117 samples (39 unique questions)
train split contains 1839 samples (695 unique questions)
val split contains 173 samples (81 unique questions)


In [11]:
df.rename(columns={"answerText":"answer"}, inplace=True)
dataset = DatasetDict(**{
    k: Dataset.from_pandas(df.loc[df.split==k,["prompt", "answer", "topic"]]) for k in df.split.unique()
})

In [12]:
import random
import pandas as pd
from IPython.display import display, HTML
from pprint import pprint

def display_examples(dataset, num_examples=5, mode="pprint"):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    idx = random.sample(list(range(len(dataset))), num_examples)
    texts = [f'{sample["prompt"]}, {sample["answer"]}' for sample in dataset.select(idx)]
    df = pd.DataFrame({"text":texts})
    
    if mode=="df":
        display(HTML(df.to_html()))
    elif mode=="pprint":
        for text in texts:
            pprint(text)
            print()
    else:
        raise ValueError(f"{mode} mode is not supported. Please select one of ['df' 'pprint']")

In [13]:
display_examples(dataset["train"], mode="df")

Unnamed: 0,text
0,"Answer like a therapist:\nWhy do I always push my boyfriend away?My boyfriend shows affection, but I just push him away. Every time my boyfriend tries to kiss, hug, or touch me I almost always push him away. I'm on birth control and it has killed my sex drive. I love him so much. Why do I do this?\nAnswer: , I sympathize with you! It is actually quite common for one partner to have a higher sex drive than the other, and can lead to discord in the relationship. The good news is, there are ways to deal with this.You may have already answered your question. There are many medications that can alter one's sex drive. If your birth control pills are the cause of this, talk to your doctor about switching to a different brand. You may even consider alternate methods for birth control, such as condoms.However, there are some other things to ask yourself. First, you used the term ""affection"" but then talked about sex. They're not the same thing. There are different kinds of intimacy, and many different ways to show love for someone. A book you might find useful is ""The Five Love Languages"".Defining what ""affection"" means to you, and to him, and discovering all the ways you both can show this towards each other, may help lessen some of your anxiety over this.Humans are hard-wired to need appropriate physical touch. In fact, studies have shown that newborns/premies who can receive skin-on-skin contact with their parent can recover faster than babies who don't receive this touch. It's called Kangaroo Care. Children who have grown up in neglectful environments where they're not hugged and kissed and shown appropriate physical contact can often be unable to show this to their partners and children when they're adults. One question to ask yourself is, were you having intimacy issues before starting the medication? If so, there may be other, deeper issues at play here.For someone who has received the wrong kind of sexual/physical touch (sexual abuse, physical beatings) it can be difficult to allow someone to be physically close to you. A therapist could help you work through this.Sometimes, a person may lose their sexual feelings for their partner due to issues in the relationship. Once those issues are resolved, the sex drive comes back. Examine your relationship and see if either of you are unhappy, if you've been arguing a lot, or if there are any problems in your relationship that need to be addressed.Finally, look for the other ways that the two of you may be intimate in a non-sexual way. You may be doing things for each other that you don't even realize, and noticing these will help you appreciate it more. Take the stress off of the sexual gratification and focus on showing your love in other ways. Some people like to give gifts, do the other person's chores, or do other things they think the person will appreciate.If, after all of this, you still have a discrepancy with one of you having a higher sex drive, there are many articles online that offer advice for couples dealing with this situation that you may find useful. It is a very common issue for a lot of couples."
1,"Answer like a therapist:\nHow does counseling end?How does a counselor decide when to end counseling sessions or to terminate working with a client?\nAnswer: , There are typically three reasons why therapy is terminated:1) Client has met therapy goals2) Client is not progressing 3) Therapist is not a good fit for clientIn order to properly assess whether therapy is helping and what progress is being made, the therapist needs to have ways of consistently checking in with clients sessions-by-session to determine what is helping, what isn't, and where the client is at in relation to their original therapy goals. When a client has met their goals, that is a good time to end counselling sessions unless the client has new goals or simply wants to check-in periodically to make sure that they are still on track (sometimes referred to as relapse prevention). When a client is not progressing, and feedback has been taken and attempts have been made to make the therapy more helpful for the client but to no avail, than it is considered unethical to continue to work with the client. In these circumstances, referring out to another therapist who may be a better fit is a good idea."
2,"Answer like a therapist:\nI think my daughter is stressing too muchWhen my daughter is stressed about a silly thing from school, she starts crying and freaking out. She is a bright student, always has a 4.0, but I am afraid she is stressing too much. I’m afraid it’s going to break her. I don't know if I should get her to a doctor or someone because this is not normal.\nAnswer: , Oh dear - this is becoming all too common. I suggest you have some conversations with your girl to figure out where she is getting the ""information"" that she builds anxiety around. I see teens who have heard teachers, parents, coaches, etc push the ""highly selective colleges"" idea on kids way too much. Teens in particular are prone to black and white thinking, leading them to see one poor test performance, one off day, one not-great assignment as the make-or-break task on the path to successful adulthood. If that's the case with your daughter, she needs you to help modulate both the incoming information (there are nearly 5000 colleges in the US - she will get into one of them!) as well as her reaction to that information.As well, you and any other adults that are involved in parenting her need to discern fact from fiction. The recent scandal around celebrities ""buying"" their kids into college is an indicator of how far off track we, as a culture, have gotten in terms of education. A college education is certainly important, but we have allowed the marketing push to create so much stress and upset for our kids (and ourselves as parents) that they are actually less prepared to leave home and really engage in higher education than ever.You may want to include a therapist in these conversations if you get stuck, but start with a parent-child conversation. Help her figure out what is going on - if it is the ""I have to get it right or I will have no future"", calmly walk her through all the evidence that says that's just not true. As well, let her know that being anxious now is NOT preparing her for a bright adulthood - it's preparing her for an anxious future. She needs help figuring out how to balance her effort to attend to real and reasonable expectations, not over-inflated fear-based actions."
3,"Answer like a therapist:\nHow can I learn to be content and at peace?I recently broke up with my boyfriend After being together for over 4 years. I truly believe I did the right thing, but omehow being single now has made me very needy and unsure of myself. I am seeing someone casually and I am losing my mind with anxiety about how he feels about me and this need for validation. How can I learn to be content and at peace with myself regardless of my career, relationship status, etc?\nAnswer: , Moving on from a long term relationship can be difficult. Sometimes we can lose a sense of who we are because in a way we have become defined by the relationship itself. So, when we begin to move on and see other people, our sense of identity may be obscured. The tendency is to seek out validation to give us a feeling of worthiness. That it's ok to move on and be happy. This is a relatively normal reaction. It's important to treat yourself with kindness and compassion through this time in your life. Mindful self-compassion can be a great way to begin the process of getting that sense of who you are back and what you want in a relationship. I think its important to look back, learn what you can from the previous relationship, and become a better person for it in the future. At the same time, taking the necessary time to nuture yourself on a mind, body, and spirit level. Self-compassion allows you to turn inward and except all facets of who you are. It improves your capacity for love, vulnerability, emotional openness, social interconnectedness, and self-respect. My philosophy is holistic, collaborative, and accepting. The cornerstone of my treatment philosophy is mindfulness and other evidenced based integrative modalities. I work as your ally to help you cultivate abundant self-compassion in your life, so that you can have the types of relationships you want."
4,"Answer like a therapist:\nI want to be a boy but I can't because of my religionI was born a girl, but I want to be a boy. Because of my religion I can't tell my family. I know they won't accept me. What do I do?\nAnswer: , First of all, I want to say, I am so sorry you are not feeling accepted by your family. I know how isolating and lonely this can be. The most important step you can take right now is building a community of supportive people who do accept you. Creating your own sense of community is very powerful for helping you love yourself. If you can find a trans support or LGBTQ support group in your area, I recommend seeking that out right away through your local LGBTQ center or PFLAG. If you don’t have access to that, I recommend calling Trans Lifeline US: 877-565-8860 Canada: 877-330-6366 https://www.translifeline.org/. You can talk to other trans-identified people anonymously for support, calling them from wherever you feel safe. This is a great way to begin to connect with other people who have similar experiences to you. Next, think of this time in your life as your time to explore your gender identity, just for yourself. Make room to explore you gender identity in ways that are private and comfortable for you. Consider reading a book like: https://www.newharbinger.com/queer-and-transgender-resilience-workbook to explore who you are and build resilience. Also, consider learning about other religions that are accepting of LGBTQ folks for another perspective - there are many out there! Once you feel you have a strong support system outside of your family and a positive sense of self-love (which can take time, be patient, don’t rush it), then you can consider what action steps you want to take with your family. If you are still living with your family or financially depend on them, having other supports in place first is very important. It’s a very personal choice how you want to navigate your family relationships, talk it through in-depth with a trusted friend, other trans folks, or therapist to help you decide what’s right for you.And remember, there are tons of people out there who will love and accept you. We are rooting for you!In solidarity,-Lindsey"


In [14]:
display_examples(dataset["train"])

('Answer like a therapist:\n'
 "I'm scared to tell my family that I'm bisexualMy parents seem okay with "
 'other sexualities, but normally they only talk about being gay. When they do '
 "talk about bisexuality, they say things like “they'll do anything” or things "
 "that make me very uncomfortable because I am bisexual. I don't know if I am "
 'ready to come out to them.\n'
 'Answer: , Coming out to family members can cause a lot of anxiety. However, '
 'although I cannot promise what their reaction will be, the benefit is that '
 'you will no longer have to hide who you are. Perhaps researching or getting '
 'information on bisexuality can help your family understand what it truly is '
 'to be bisexual. Many times, people are just not educated on certain things.')

('Answer like a therapist:\n'
 'Is it okay for my girlfriend to have sex with other men since I can’t '
 'sexually perform?I am currently suffering from erectile dysfunction and have '
 'tried Viagra, Cialis, etc. Nothin

Note: "prompt" and "answer" are kept as separate fields because it will be handy later for proper labeling. Tokens corresponding to "prompt" will be ignored at loss computation as we are interested in generating answers only.

In [15]:
sample = dataset["train"][0]
tokenizer(sample["prompt"], sample["answer"], return_token_type_ids=True)

{'input_ids': [33706, 588, 257, 24636, 25, 198, 6090, 314, 1487, 616, 4203, 286, 852, 28063, 284, 2506, 30, 40, 1101, 1016, 832, 617, 1243, 351, 616, 7666, 290, 3589, 13, 314, 8523, 3993, 290, 314, 466, 2147, 475, 892, 546, 703, 314, 1101, 28063, 290, 703, 314, 6584, 470, 307, 994, 13, 314, 1053, 1239, 3088, 393, 39496, 7341, 13, 314, 1053, 1464, 2227, 284, 4259, 616, 2428, 11, 475, 314, 1239, 651, 1088, 284, 340, 13, 1374, 460, 314, 1487, 616, 4203, 286, 852, 28063, 284, 2506, 30, 198, 33706, 25, 220, 1532, 2506, 6834, 345, 821, 28063, 11, 788, 3863, 345, 761, 284, 1064, 649, 661, 284, 8181, 503, 351, 13, 42338, 11, 262, 1919, 4732, 287, 543, 257, 1048, 3160, 318, 257, 1263, 4588, 287, 2116, 12, 31869, 13, 48059, 11, 345, 460, 467, 2835, 290, 2835, 2111, 284, 1833, 1521, 345, 821, 407, 28063, 11, 788, 467, 736, 284, 262, 976, 4315, 290, 307, 13642, 866, 757, 13, 1858, 389, 867, 40840, 6218, 345, 460, 1064, 287, 1919, 2056, 13, 6674, 1100, 617, 286, 262, 3392, 543, 1181, 326, 645, 1048