In [2]:
import requests

In [3]:
from dotenv import load_dotenv
import os

# Load environment variable

In [4]:
load_dotenv()

key_var_name = "MODEL_ENDPOINT"
if not key_var_name in os.environ:
  raise Exception('Could not find your endpoint in .env file')
uri=os.getenv(key_var_name)


# Functions

In [5]:
def get_response(texts,uri):
    #make the texts as json
    request=build_json(texts)
    headers = {'Content-Type': 'application/json'}
    response = requests.post(uri, request,headers=headers)
    return response

In [6]:
def build_json(texts):
    '''texts mut be of format [string]'''
    i=1
    dict_text={}
    for text in texts:
        dict_text[i]=text
        i+=1
    return json.dumps({"text":dict_text})

In [7]:
def show_response(texts,uri):
    response=get_response(texts,uri)
    for text,pred in zip(texts,get_response(texts,uri).json()):        
        print(f'{text}--->{pred}')
    

In [8]:
texts=["lot of love","love you mom","I hate you guys"]

In [9]:
show_response(texts,uri)

lot of love--->[0.8921286463737488]
love you mom--->[0.9433186054229736]
I hate you guys--->[0.02920299768447876]


# Web interface

In [None]:
!streamlit run web_interface_st.py

In [11]:
from azureml.core import Workspace, Dataset

subscription_id = 'your_subscription_ID'
resource_group = 'OC-P7'
workspace_name = 'P7_ML'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='tweets_common')
data = dataset.to_pandas_dataframe()

# performance on common dataset


In [12]:
def multiple_response(texts,uri):
    preds=[]
    for text in texts:
        try:
            pred = get_response([text],uri).json()
            
        except ValueError:
            pred=[[np.nan]]
            
        preds.append(pred)
    
    return preds
        
        

In [16]:
import numpy as np

In [17]:
preds = multiple_response(data['text'].values,uri)

In [18]:
data['pred'] = list(np.array(preds).squeeze())

In [19]:
data['pred_bin']=[1 if data.loc[i,'pred']>=0.5 else 0 for i in data.index]

In [21]:
data_clean = data.dropna()

In [23]:
data_clean

Unnamed: 0,Column1,target,text,pred,pred_bin
0,1516037,1,"Wow, its later than I feel, better wrap up ano...",0.698310,1
1,589923,0,@lemonissimo I think the reason I twitted so m...,0.423505,0
2,213819,0,@GericaQuinn ahhhhh! dude u suck! lmao. jk! bu...,0.618300,1
3,10047,0,...aaaand there goes that great day RIP Mrs W...,0.221471,0
4,1330460,1,another morning joe free morning ahhhh ... sun...,0.913709,1
...,...,...,...,...,...
1595,622620,0,Damn packing trumps Xsport,0.041511,0
1596,232219,0,bout 2 call it a NIGHT... madd TIRED..gotta he...,0.196376,0
1597,1368574,1,"@IneffableNothin I love Pandora, but I am real...",0.553902,1
1598,441029,0,@simoncurtis wish i could,0.034489,0


## Confusion matrix

In [24]:
from sklearn.metrics import confusion_matrix

In [25]:
confusion_matrix(data_clean['target'],data_clean['pred_bin'])

array([[608, 172],
       [151, 598]], dtype=int64)

## Error analysis (False Positive)

In [27]:
data_fp = data_clean[(data_clean.target==0) & (data_clean.pred_bin==1)]

In [210]:
data_fp.describe()

Unnamed: 0,Column1,target,pred,pred_bin
count,143.0,143.0,143.0,143.0
mean,380886.132867,0.0,0.702013,1.0
std,223907.752977,0.0,0.128907,0.0
min,7767.0,0.0,0.504429,1.0
25%,197434.5,0.0,0.598127,1.0
50%,404004.0,0.0,0.687112,1.0
75%,566146.0,0.0,0.798584,1.0
max,797757.0,0.0,0.979371,1.0


In [215]:
data_fp['text'].to_csv('false_positive.csv')

In [26]:
data_fp['text'].iloc[0]

NameError: name 'data_fp' is not defined

In [28]:
data_fp['text'].iloc[1]

" that's all i have to say"

In [29]:
data_fp['text'].iloc[2]

'@shell2323 Be on my recent @replies and me know you, *laughs* I would have put you, but I forgotted. '

In [30]:
data_fp['text'].iloc[3]

"it`s all just a short dream. never see that comimg. it turned my smile  :'("

In [31]:
data_fp['text'].iloc[4]

"@lindslou10  Yeah I'm special sometimes. AND I agree, it is way to early to be up on a Saturday morning.  "

In [32]:
data_fp['text'].iloc[5]

'Ayoo my tooth is hurtin twitter!!!!  need liqz 2 make it better ;)'

In [33]:
data_fp['text'].iloc[6]

'I think dinner might be nearly ready '

In [34]:
data_fp['text'].iloc[7]

'@taebunny In like 2 weeks  All of it is coming from China'

In [35]:
data_fp['text'].iloc[8]

"@Galvinatwindows Thanks Fred but I won't be able to make it today. Too much work on  Have fun!"

In [36]:
data_fp['text'].iloc[9]

'@stolenrain I thought I did reply though   Your birthday message?'

In [37]:
data_fp['text'].iloc[10]

'Bout to deck my orientee. '

In [38]:
data_fp['text'].iloc[11]

'miley and demi please reply '

In [39]:
data_fp['text'].iloc[12]

'Finds twitter really wierd, nowhere near as good as facebook '

In [40]:
data_fp['text'].iloc[13]

'@zyber17 yeah '