In [1]:
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import torch
import spacy

In [2]:
# load tanda roberta model
tokenizer = RobertaTokenizer.from_pretrained('models/tanda_roberta_large_asnq_wikiqa/ckpt/')
model = RobertaForSequenceClassification.from_pretrained('models/tanda_roberta_large_asnq_wikiqa/ckpt/') 
labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1

# load spacy english language model
nlp = spacy.load('en_core_web_lg')

# load csv
df = pd.read_csv('web-answers.csv')

In [3]:
nb_rows = df.shape[0]

tanda_loss = [0.0 for _ in range(nb_rows)]
tanda_answer_sentence = ['' for _ in range(nb_rows)]
tanda_sentence_loss = [0.0 for _ in range(nb_rows)]
tanda_sentence_proba = [0.0 for _ in range(nb_rows)]

def getLossAndProbas(q, a, model, labels):
    try:
        inputs = tokenizer.encode_plus(q, a, add_special_tokens=True, return_tensors="pt")
        outputs = model(**inputs, labels=labels)
        loss, logits = outputs[:2]
        probas = torch.sigmoid(logits).detach().numpy()
        return loss.detach().numpy(), probas[0, 1]
    except:
        print('too long answer')
        return 10, 0
def divideToSentences(doc, nlp):
    try:
        doc = nlp(doc)
        sentences = [sent.string.strip() for sent in doc.sents]
        return sentences
    except:
        return []

In [4]:
for i, r in df.iterrows():
    print('{}: Working on Question: {} - qid: {} - aid: {}'.format(i, r['question'], r['qid'], r['aid']))
    loss = 10 # default loss value
    try:
        loss, _ = getLossAndProbas(r['question'], r['answer'], model, labels)
    except:
        print('too long paragraph')
    tanda_loss[i] = loss
    sens = divideToSentences(r['answer'], nlp)
    mnsen, mn, probas = '', 10, [1, 0]
    for sen in sens:
        ls, ps = getLossAndProbas(r['question'], sen, model, labels)
        if ls < mn:
            mn = ls
            probas = ps
            mnsen = sen
    tanda_answer_sentence[i] = mnsen
    tanda_sentence_loss[i] = mn 
    tanda_sentence_proba[i] = probas

df['tanda_loss'] = tanda_loss
df['tanda_answer_sentence'] = tanda_answer_sentence
df['tanda_sentence_loss'] = tanda_sentence_loss
df['tanda_sentence_proba'] = tanda_sentence_proba

df.to_csv('tanda-web-results.csv')


0: Working on Question: c# this in method parameter - qid: 29080 - aid: 50386
1: Working on Question: c# this in method parameter - qid: 29080 - aid: 50387
2: Working on Question: c# this in method parameter - qid: 29080 - aid: 50388
3: Working on Question: cluster plot in r - qid: 29081 - aid: 50393
too long answer
4: Working on Question: cluster plot in r - qid: 29081 - aid: 50397
5: Working on Question: poisson regression vs linear regression - qid: 29082 - aid: 50400
6: Working on Question: poisson regression vs linear regression - qid: 29082 - aid: 50401
7: Working on Question: poisson regression vs linear regression - qid: 29082 - aid: 50402
8: Working on Question: poisson regression vs linear regression - qid: 29082 - aid: 50403
9: Working on Question: poisson regression vs linear regression - qid: 29082 - aid: 50408
10: Working on Question: r text sentiment analysis - qid: 29083 - aid: 50410
11: Working on Question: r text sentiment analysis - qid: 29083 - aid: 50411
12: Workin

103: Working on Question: declare arrays python - qid: 29097 - aid: 50550
104: Working on Question: declare arrays python - qid: 29097 - aid: 50551
105: Working on Question: declare arrays python - qid: 29097 - aid: 50552
106: Working on Question: declare arrays python - qid: 29097 - aid: 50553
107: Working on Question: declare arrays python - qid: 29097 - aid: 50554
108: Working on Question: declare arrays python - qid: 29097 - aid: 50555
109: Working on Question: declare arrays python - qid: 29097 - aid: 50556
110: Working on Question: declare arrays python - qid: 29097 - aid: 50557
111: Working on Question: declare arrays python - qid: 29097 - aid: 50558
112: Working on Question: declare arrays python - qid: 29097 - aid: 50559
113: Working on Question: multinomial regression analysis - qid: 29098 - aid: 50563
114: Working on Question: multinomial regression analysis - qid: 29098 - aid: 50567
115: Working on Question: python knn code - qid: 29099 - aid: 50573
116: Working on Question

205: Working on Question: negative words to positive words - qid: 29117 - aid: 50758
too long answer
206: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50760
207: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50761
208: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50762
209: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50763
210: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50764
211: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50765
212: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50766
too long answer
213: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50767
214: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50768
215: Working on Question: numpy array apply lambda - qid: 29118 - aid: 50769
216: Working on Question: synonyms for the word more - qid: 29119 - aid: 50774
217: Working on Question: synonyms

309: Working on Question: where does pip3 install to - qid: 29139 - aid: 50968
310: Working on Question: pca for dimensionality reduction - qid: 29140 - aid: 50970
311: Working on Question: np array astype - qid: 29141 - aid: 50989
too long answer
312: Working on Question: linear association - qid: 29143 - aid: 51001
313: Working on Question: python default values - qid: 29144 - aid: 51017
314: Working on Question: linear equations step by step - qid: 29145 - aid: 51026
315: Working on Question: linear equations step by step - qid: 29145 - aid: 51028
316: Working on Question: linear equations step by step - qid: 29145 - aid: 51029
317: Working on Question: python gaussian mixture model - qid: 29146 - aid: 51031
318: Working on Question: python gaussian mixture model - qid: 29146 - aid: 51033
319: Working on Question: python gaussian mixture model - qid: 29146 - aid: 51034
320: Working on Question: python gaussian mixture model - qid: 29146 - aid: 51035
321: Working on Question: python 

414: Working on Question: csr matrix python - qid: 29171 - aid: 51288
415: Working on Question: csr matrix python - qid: 29171 - aid: 51289
416: Working on Question: quadratic fit formula - qid: 29172 - aid: 51294
417: Working on Question: quadratic fit formula - qid: 29172 - aid: 51295
418: Working on Question: quadratic fit formula - qid: 29172 - aid: 51297
419: Working on Question: quadratic fit formula - qid: 29172 - aid: 51298
420: Working on Question: performance review metrics - qid: 29173 - aid: 51301
421: Working on Question: performance review metrics - qid: 29173 - aid: 51302
422: Working on Question: performance review metrics - qid: 29173 - aid: 51303
423: Working on Question: performance review metrics - qid: 29173 - aid: 51309
424: Working on Question: python calculate mean of list - qid: 29175 - aid: 51322
425: Working on Question: python calculate mean of list - qid: 29175 - aid: 51323
426: Working on Question: python calculate mean of list - qid: 29175 - aid: 51324
42

514: Working on Question: good databases to use - qid: 29190 - aid: 51472
515: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51475
516: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51476
517: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51477
518: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51478
519: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51481
520: Working on Question: discrete random variable calculator - qid: 29191 - aid: 51482
521: Working on Question: python cross validation code - qid: 29192 - aid: 51487
522: Working on Question: python cross validation code - qid: 29192 - aid: 51491
523: Working on Question: r poisson regression example - qid: 29193 - aid: 51493
524: Working on Question: r poisson regression example - qid: 29193 - aid: 51494
525: Working on Question: r poisson regression example - qid: 29193 - aid:

612: Working on Question: communication model diagram - qid: 29211 - aid: 51673
613: Working on Question: communication model diagram - qid: 29211 - aid: 51674
614: Working on Question: communication model diagram - qid: 29211 - aid: 51675
615: Working on Question: communication model diagram - qid: 29211 - aid: 51676
616: Working on Question: communication model diagram - qid: 29211 - aid: 51677
617: Working on Question: communication model diagram - qid: 29211 - aid: 51678
618: Working on Question: communication model diagram - qid: 29211 - aid: 51679
619: Working on Question: communication model diagram - qid: 29211 - aid: 51680
620: Working on Question: communication model diagram - qid: 29211 - aid: 51681
621: Working on Question: communication model diagram - qid: 29211 - aid: 51682
622: Working on Question: examples of nominal variables statistics - qid: 29212 - aid: 51685
623: Working on Question: examples of nominal variables statistics - qid: 29212 - aid: 51686
624: Working o

713: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51955
714: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51957
715: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51958
716: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51960
717: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51961
718: Working on Question: 2 sample test statistic calculator - qid: 29239 - aid: 51962
719: Working on Question: nominal data analysis - qid: 29240 - aid: 51964
720: Working on Question: nominal data analysis - qid: 29240 - aid: 51965
721: Working on Question: nominal data analysis - qid: 29240 - aid: 51966
722: Working on Question: nominal data analysis - qid: 29240 - aid: 51967
723: Working on Question: nominal data analysis - qid: 29240 - aid: 51968
724: Working on Question: nominal data analysis - qid: 29240 - aid: 51969
725: Working on Question: nominal 

815: Working on Question: segmentation analysis in marketing - qid: 29258 - aid: 52143
816: Working on Question: segmentation analysis in marketing - qid: 29258 - aid: 52144
817: Working on Question: segmentation analysis in marketing - qid: 29258 - aid: 52147
818: Working on Question: python show all variables - qid: 29259 - aid: 52148
819: Working on Question: python show all variables - qid: 29259 - aid: 52149
820: Working on Question: python show all variables - qid: 29259 - aid: 52150
821: Working on Question: python show all variables - qid: 29259 - aid: 52151
822: Working on Question: python show all variables - qid: 29259 - aid: 52152
823: Working on Question: python show all variables - qid: 29259 - aid: 52153
824: Working on Question: python show all variables - qid: 29259 - aid: 52154
825: Working on Question: python show all variables - qid: 29259 - aid: 52155
826: Working on Question: python show all variables - qid: 29259 - aid: 52156
827: Working on Question: python show

917: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52332
918: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52333
919: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52334
920: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52335
921: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52336
922: Working on Question: tensorflow print tensor - qid: 29277 - aid: 52337
923: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52339
too long answer
924: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52340
925: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52341
926: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52342
927: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52343
928: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52344
929: Working on Question: xgboost python sklearn - qid: 29278 - aid: 52345
930