In [1]:
from bertviz import head_view
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import random
import re
import os

In [2]:
def get_questions(path):
    '''
        Read in all the questions with their ids from a specific path
    '''

    questions = {}
    for root, dirs, files in os.walk(path):
        for file in files:
            if file=='context.txt':
                with open(os.path.join(root,file)) as f:
                    q_full = f.read().split('<br>')
                    q_text = q_full[2]
                    q_title = q_full[0]
                    q_context = ' '.join([x for x in re.split("[?.!;]", q_text) if x!=""][-3:])
                    # Strip off newline and tab characters
                    q_context = q_context.replace('\n', '').replace('\t', '')
                    q_context = q_context + " " + q_title
                    questions[root.split('/')[-1]] = q_context

    return questions

In [3]:
def find_context_full(ind, df):
    '''
    Return a reliable context for every sentence, instead of one of 3 possible
    things which might confuse the downstream model
    '''

    split_ind = ind.split('-')
    context = df.loc[(df['Post.ID']==split_ind[0]) & \
                     (df['Reply.ID']==split_ind[1]) & \
                     (df['Sent.Num']!=split_ind[2]), 'Sentence'].values.tolist()
    context = " ".join(context)

    return context

In [4]:
def find_reply(ind, df):
    '''
    Return a reliable context for every sentence, instead of one of 3 possible
    things which might confuse the downstream model
    '''

    split_ind = ind.split('-')
    context = df.loc[(df['Post.ID']==split_ind[0]) & \
                     (df['Reply.ID']==split_ind[1]), 'Sentence'].values.tolist()
    context = " ".join(context)

    return context

In [5]:
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min',
      jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
  }
});

<IPython.core.display.Javascript object>

In [6]:
seed=20 # Not that this even matters if we're training on a separate machine

In [7]:
# Set random seeds for reproducibility on a specific machine
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
np.random.RandomState(seed)

RandomState(MT19937) at 0x1A47502678

In [8]:
pd.set_option('display.max_colwidth', None)

data = 'needadvice'

pred_file = '../../preds/classifier_needadvice_bert_dropout:0.1_lr_tr:1e-05_lr_cl:1e-05_wd:0_batch:32_finetune:True_query:True_context:False_seed:20_multigpu:True_labels:ds_frac:1.csv'

dev = pd.read_csv(pred_file, header=0, sep='\t')
dev_sentences = dev['Sentence'].tolist()
dev_labels_DS = dev['DS_Label'].values
dev_labels_Maj = dev['Majority_label'].values
dev['Pred_Label'] = dev['Pred_Label'].astype(int)
dev['Reply.ID'] = dev['Reply.ID'].astype(int).astype(str)
dev['Sent.Num'] = dev['Sent.Num'].astype(str)
dev_preds = dev['Pred_Label'].values
questions = get_questions('../../rawdata/' + data)
dev['Question'] = dev['ID'].apply(lambda x:questions[x.split('-')[0]])
dev['Context'] = dev['ID'].apply(lambda x: find_context_full(x, dev))

In [9]:
dev = dev.set_index('ID')

In [None]:
dev[(dev['DS_Label']==1)& (dev['Pred_Label']==1)].sample(50)

In [None]:
dev[(dev['DS_Label']==1) & (dev['Pred_Label']==0)].sample(50)

In [13]:
dev

Unnamed: 0_level_0,Majority_label,DS_Label,Sentence,Flair,Post.ID,Reply.ID,Sent.Num,Question,Pred_Label,Context
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ah4b6r-1-1,1,1,It really helps to say it out loud AND write it down .,Education,ah4b6r,1,1,"[deleted] I have an exam coming up; and despite constant studying, for the life of me, some stuff just won't stick in my brain. Reddit; what is your advice?",0,Good luck ! Certified tutor . Then when you see the questions you will remember what you said and/ or wrote .
d3cklw-2-0,0,0,"Could you get a plant , it takes a lot of dedication and time and patience to grow a plant .",Mental Health,d3cklw,2,0,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,0,"They need to be well looked after , you can even talk to plants , as crazy as it sounds they can grow healthier if you interact with them . Each different one has different needs and honestly they can be more of a challenge to care for . Once you can manage to keep a plant alive it ’s a sure sign that upon release you will be capable of looking after an animal . Added bonus ( most ) plants wo n't scratch you . The first time I had a root sprout off of a cutting I was super proud of my little plant . It 's fun to watch the systems change and is very rewarding ."
d3cklw-13-0,1,1,"Get a plant , an orchid or something you need to look after .",Mental Health,d3cklw,13,0,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,1,
ad66g1-3-2,1,1,You need to rest and relax .,Life Decisions,ad66g1,3,2,[deleted] How do I manage my free time better?,1,Pushing yourself to be over - productive is a recipe for burnout . You do know that video editing is a job that people can get paid for ? Why do n't you take a video editing class so you can do that as a part of your coursework ? Have you thought about pursuing that as a career so you can do it as your job ? It 's alright to rest and relax .
cqwmxj-1-4,1,1,Take general studies for a year and see if anything pulls you or perhaps get a technical degree .,Life Decisions,cqwmxj,1,4,What are your experiences with college or getting a good job after high school I’m so stressed I’m sick Heellp I’m trying to figure out if I should go to college or just find a good job...,1,You can definitely find something without college but it sounds like you want to go either way . I think that is one of the most stressful fields . I think you need to know firstly that if you are sick stressed just trying to make the choice of college or not ; would emergency medicine even be a good career choice for you ? No matter your choice a quick internet search to find out if you need school to do what you want .
...,...,...,...,...,...,...,...,...,...,...
c3v94q-1.1-0,0,0,Thanks for the advice .,Friendships,c3v94q,1,0,[deleted] My Best Friend Is Ignoring Me?,0,
bzp12f-19-7,1,1,Save voicemails if possible .,Mental Health,bzp12f,19,7,"I’m just so scared, and I’m not even sure if this fear is rational I would just like to know what the next step is for getting out of this hole of depression, because I really do want to get better EDIT: I just went outside and took a walk, and then went on a bike ride (I’m in Japan rn) and I feel so much better, like my normal self I’m scared of death",1,"Take pictures and videos so you have them when you can no longer see them . I only just woke up from my sadness about 8 mos ago , about two years after he died . My biggest fear was losing my parents . You ’ll miss them when they ’re gone . Ask everything you want to . So my advice for you is , instead of dwelling , go spend time with them . After losing my dad , there are no words to explain the pain I went through . As much as possible . I knew it would devastate me and be terrible . Do n’t get annoyed with their random idiosyncrasies . Do n’t get irritated by them lecturing or giving advice ."
c8tmtz-14-0,0,0,12 is a perfectly fine age to get into new activities .,Education,c8tmtz,14,0,I’m pretty skinny and about average-ish height My stamina is terrible since I don’t do exercise so I’m not sure if I’ll be good enough to even start doing something like a physical activity Please help Is it too late to start a hobby/activity at 12?,0,"I took up triathlon around that age , you 're never to old . Start small ( walk and take breaks if you need ) and slowly build up the distance or speed . If you want to improve your fitness try running ."
c8th26-13-0,0,1,Someone is trying to get your attention .,Mental Health,c8th26,13,0,"[removed] Can't stop seeing 1:11, 2:22, 3:33, 4:44, 5:55, 10:10, 11:11, 12:12...",0,"Repeating numbers mean different things , google 1111 , or 222 and see if any of it applies to you :)"


In [22]:
delrem = dev.copy()
delrem['Reply'] = delrem.index.map(lambda x: find_reply(x, dev))
delrem = delrem.reset_index()
delrem = delrem.loc[delrem['Question'].str.contains('\[deleted\]') | delrem['Question'].str.contains('\[removed\]'), ['Question', 'Reply']]
delrem.sample(10)

Unnamed: 0,Question,Reply
613,"[deleted] My ""friends"" might be inconsiderate jerks","I went through the exact same a couple months ago . Turns out they were gay lmao I confronted them about it twice but they did n’t seem to care , so I got new friends . They were my best friends for about 3 years already , but then they just started ignoring me and doing things behind my back just like your “ friends ” ."
342,[deleted] My Best Friend Is Ignoring Me?,"And how often would you say you get these attacks and call her to help you get over them?I think you should be in some kind of therapy . Do you have any other friends ? I have a few other friends be we are not as close . She might not want that kind of responsibility . So maybe after talking to her and seeing if she ’s upset about something , you might want to consider adding people to your social circle . I help her with her issues and she helps me . Its never really bad but it still helps talking to someone . I call her about one time a week for help . Thanks for your response . Idk if a 14 year old ( her ) is the best way of dealing with your situation . EDIT : Also , in general , it ’s not a good idea to depend on anyone too much ."
580,"[removed] Can't stop seeing 1:11, 2:22, 3:33, 4:44, 5:55, 10:10, 11:11, 12:12...","I was thinking of something along these lines . My gosh , ` : 37 ` is all you ’ll be able to remember seeing . You could also probably write an IFTTT to randomly vibrate your phone when the time is n't 1:11 , 2:22 , etc . Alternatively , because OP is obviously assigning value to these times and thus remembering them while forgetting others , perhaps he / she could purposefully pay attention to a different arbitrary pattern . If you want to throw it off , start setting alarms on your phone to be times that are n't 1:11 , 2:22 , etc . As others have said , it 's just confirmation bias . Or a step further , keeping a journal ... > 1:03 > 1:46 > 2:22 > 3:13 > 4:12“Oh , I see I ’m actually just misremembering.”Of course , I ’m no psychologist , and talking to someone qualified is better than following random internet advice . Imagine all the times the clock shows 37 minutes when you look !"
234,"[deleted] My ""friends"" might be inconsiderate jerks",There are people out there and it ’s hard and uncomfortable to make yourself vulnerable by venturing out on your own to find them but is it any more uncomfortable than being humiliated or left out ? I think you sound like a generous and kind person and maybe by staying around them you ’re preventing yourself from meeting people that would treat you like a human being . I think you should really consider how you ’d treat them in those situations and ask yourself if you would do any of that to them ... or to anyone ?
77,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,"The wording you used earlier ( rough paraphrasing ) "" moving into my own place would make me happier "" I think you should shift your focus on being content with your current situation and things may improve for you on their own . Regardless , i hope things improve for you ! If you are interested in some book titles i will gladly tell you the ones that have helped me . I respect that it would be a depressing environment but i do n't think a pet or your own place will be the answer you are hoping for . Probably not an answer you are looking for but i d recomend reading philosophy ."
354,[deleted] Feels like I’m at a cross roads and would like a third persons perspective here,"Say something along the lines of "" I 'm excited for the chance to work at --- , but it would be a major change for me . How many days would there be between the interview and your diagnosis ? May I have a few days to consider your offer ? "" Unless they 're desperate for help they should at least agree to a couple of days . If it 's not long , when they give you the offer you could respectfully request a bit of time to make the decision ."
462,[deleted] My Best Friend Is Ignoring Me?,"Your friend might just not have the mental willpower to keep holding up the weight of your mental illnesses . When I was in high school , I had one friend that consistently came to me when she was having extreme mental issues and it placed an extremely heavy burden on me . I was stressed and felt like her mental health was my responsibility . I do n’t mean to be harsh , but that ’s just the reality of it . Thanks for the advice . It got to a point where I just told her I could n’t do it anymore . I suggest finding therapy , and maybe even apologizing to your friend for expecting her to be entirely responsible for your mental health when she is only a teenager . You need to find another way to cope . Its ok , I understand what you mean . I understand it ’s difficult , but you ca n’t drag others down with you ."
591,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,"are you able to meet other people outside of the hospital that you can connect with ? I suggest a plant , volunteering at a shelter if you can , requesting a service animal or finding a pen pal- the real deal someone to write on paper and mail out . The act of physically writng and seeing another persons handwriting is theraputic for me and I love looking forward to getting letters!What sort of work are you doing ?"
552,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,"Is there any sort of assisted living where you are ? Someone mentioned plants which are also great . If you have a job it sounds like you are leaving the hospital regularly . Yes , I agree with the idea of looking into a program that might be labeled as "" residential "" which requires you to live onsite and attend treatment regularly , but does not restrict you to a sterile and isolated medical facility . Is there the option for supervised trips off site ? Kind of like a stepping stone between inpatient and independence . That would likely be a good stepping stone especially considering mom has to sign off on the release . If so , you can see if volunteering at an animal shelter may be an option . Are you required to stay in the facility around the clock ? Pet therapy is the only thing that truly helped me ."
149,[removed] [Lonely and Depressed] In Mental Institution with No Chance of Leaving,"Is there any sort of assisted living where you are ? Someone mentioned plants which are also great . If you have a job it sounds like you are leaving the hospital regularly . Yes , I agree with the idea of looking into a program that might be labeled as "" residential "" which requires you to live onsite and attend treatment regularly , but does not restrict you to a sterile and isolated medical facility . Is there the option for supervised trips off site ? Kind of like a stepping stone between inpatient and independence . That would likely be a good stepping stone especially considering mom has to sign off on the release . If so , you can see if volunteering at an animal shelter may be an option . Are you required to stay in the facility around the clock ? Pet therapy is the only thing that truly helped me ."


## Model loading

In [None]:
saved_model = '../../saved_pretrained/classifier_needadvice_bert_dropout:0.1_lr_tr:1e-05_lr_cl:1e-05_wd:0_batch:32_finetune:True_query:True_context:False_seed:20_multigpu:True_labels:ds_frac:1/'

tokenizer = AutoTokenizer.from_pretrained(saved_model)
model = AutoModelForSequenceClassification.from_pretrained(saved_model)

model.eval()

In [None]:
def predict(sequence):
    '''
        Returns classification value - 0,1 given sentence
    '''
    
    x = tokenizer.encode_plus(sequence[0], sequence[1], return_tensors='pt', add_special_tokens=True, return_token_type_ids=True)
    input_ids = x['input_ids']
    token_type_ids = x['token_type_ids']
    with torch.no_grad():
        logits = model(input_ids, token_type_ids=token_type_ids)[0]
    print(logits)

In [None]:
print(dev.loc['d3cklw-14-1','Sentence'],"\nContext:", dev.loc['d3cklw-14-1','Context'], "\nQuestion:", dev.loc['d3cklw-14-1','Question'])

In [None]:
sequence = (dev.loc['d3cklw-14-1','Sentence'], dev.loc['d3cklw-14-1','Question'])

In [None]:
predict(sequence)

## Attention Heads View

In [None]:
def show_head_view(model, tokenizer, sentence_a, sentence_b=None):
    inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=True, return_token_type_ids=True)
    input_ids = inputs['input_ids']
    if sentence_b:
        token_type_ids = inputs['token_type_ids']
        with torch.no_grad():
            attention = model(input_ids, token_type_ids=token_type_ids)[-1]
        sentence_b_start = token_type_ids[0].tolist().index(1)
    else:
        with torch.no_grad():
            attention = model(input_ids)[-1]
        sentence_b_start = None
    input_id_list = input_ids[0].tolist() # Batch index 0
    tokens = tokenizer.convert_ids_to_tokens(input_id_list)    
    head_view(attention, tokens, sentence_b_start)

In [None]:
sentence = dev.loc['d3cklw-14-1','Sentence']
context = dev.loc['d3cklw-14-1','Question']

In [None]:
show_head_view(model, tokenizer, sentence_a=sentence, sentence_b=context)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(dev['DS_Label'], dev['Pred_Label']))