# ImageforWeeds_Evaluation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import pipeline

import warnings
warnings.filterwarnings("ignore")

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('ImageforWeeds.csv', index_col = [0])

In [3]:
#replace NaN in label columns with 0
columns = ['cost','ease of use', 'effective', 'efficient']

for column in columns:
    df[column] = df[column].replace(np.nan, int(0))

## Sentiment Analysis

In [4]:
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

sentiment = [None] * len(df)
index = -1
for sentence in df['review_lower'][0:99]:
    index+=1
    if(index%20 == 0):
        print(index)

    result = sentiment_pipeline(sentence[:512])[0]
    sentiment[index] = result['label']
df['sentiment_m'] = sentiment

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


0
20
40
60
80


## Zero-Shot classifier

In [5]:
classifier  = pipeline("zero-shot-classification",  model = "facebook/bart-large-mnli")

In [6]:
# insert the labels you identified from above section
type = ['cost', 'efficient', 'effective', 'ease of use']

In [7]:
#setting empty values for the columns
index = -1
for label in type:
    df[label + '_m'] = -1

for j in range(99):
    #counter for progress/debugging
    index+=1
    if(index%20 == 0): 
        print(index)
        
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['review_lower'],
        candidate_labels = type,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

0
20
40
60
80


## 1. Evaluate Sentiment

In [8]:
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score

In [9]:
def imputation1(list):
    i = []
    for prediction in list:
        if prediction == "Positive":
            i.append(1)
        elif prediction == "Neutral":
            i.append(0)
        else:
            i.append(-1)    
    return(i)

In [10]:
df['sentiment'] = imputation1(df['sentiment'])
df['sentiment_m'] = imputation1(df['sentiment_m'])

In [11]:
print(classification_report(df['sentiment'], df['sentiment_m']))

              precision    recall  f1-score   support

          -1       1.00      0.81      0.90       400
           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0

    accuracy                           0.81       400
   macro avg       0.33      0.27      0.30       400
weighted avg       1.00      0.81      0.90       400



## 2. Evaluate Zero-shot

In [12]:
def imputation(list):
    i = []
    for prediction in list:
        if prediction < 0.7:
            i.append(0)
        else:
            i.append(1)
    
    return(i)

In [13]:
df['cost_m'] = imputation(df['cost_m'])
df['efficient_m'] = imputation(df['efficient_m'])
df['effective_m'] = imputation(df['effective_m'])
df['ease of use_m'] = imputation(df['ease of use_m'])

### i) cost

In [14]:
print(classification_report(df['cost'], df['cost_m']))

              precision    recall  f1-score   support

         0.0       1.00      0.97      0.98       394
         1.0       0.31      0.83      0.45         6

    accuracy                           0.97       400
   macro avg       0.65      0.90      0.72       400
weighted avg       0.99      0.97      0.98       400



In [15]:
ROC_AUC = roc_auc_score(df['cost'], df['cost_m'])
print('ROC AUC : {:.4f}'.format(ROC_AUC))

ROC AUC : 0.9027


### ii) efficient

In [16]:
print(classification_report(df['efficient'], df['efficient_m']))

              precision    recall  f1-score   support

         0.0       0.96      0.90      0.93       359
         1.0       0.44      0.71      0.54        41

    accuracy                           0.88       400
   macro avg       0.70      0.80      0.74       400
weighted avg       0.91      0.88      0.89       400



In [17]:
ROC_AUC = roc_auc_score(df['efficient'], df['efficient_m'])
print('ROC AUC : {:.4f}'.format(ROC_AUC))

ROC AUC : 0.8021


### iii) effective

In [18]:
print(classification_report(df['effective'], df['effective_m']))

              precision    recall  f1-score   support

         0.0       0.93      0.98      0.96       309
         1.0       0.93      0.76      0.84        91

    accuracy                           0.93       400
   macro avg       0.93      0.87      0.90       400
weighted avg       0.93      0.93      0.93       400



In [19]:
ROC_AUC = roc_auc_score(df['effective'], df['effective_m'])
print('ROC AUC : {:.4f}'.format(ROC_AUC))

ROC AUC : 0.8710


### iv) ease of use

In [20]:
print(classification_report(df['ease of use'], df['ease of use_m']))

              precision    recall  f1-score   support

         0.0       0.98      0.93      0.95       361
         1.0       0.55      0.79      0.65        39

    accuracy                           0.92       400
   macro avg       0.77      0.86      0.80       400
weighted avg       0.94      0.92      0.92       400



In [21]:
ROC_AUC = roc_auc_score(df['ease of use'], df['ease of use_m'])
print('ROC AUC : {:.4f}'.format(ROC_AUC))

ROC AUC : 0.8628


## 3. Compare Zero-Shot against a Naive Classifier

In [22]:
def naive(list):
    avg  = np.mean(list)

    if avg >= 0.5:
        i = 1
    else:
        i = 0

    return([i] * len(list))

In [23]:
df['naive_cost'] = naive(df['cost'])
df['naive_efficient'] = naive(df['efficient'])
df['naive_effective'] = naive(df['effective'])
df['naive_easeofuse'] = naive(df['ease of use'])

In [24]:
# cost
print(classification_report(df['cost'], df['naive_cost']))

              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99       394
         1.0       0.00      0.00      0.00         6

    accuracy                           0.98       400
   macro avg       0.49      0.50      0.50       400
weighted avg       0.97      0.98      0.98       400



In [25]:
# efficient
print(classification_report(df['efficient'], df['naive_efficient']))

              precision    recall  f1-score   support

         0.0       0.90      1.00      0.95       359
         1.0       0.00      0.00      0.00        41

    accuracy                           0.90       400
   macro avg       0.45      0.50      0.47       400
weighted avg       0.81      0.90      0.85       400



In [26]:
# effective
print(classification_report(df['effective'], df['naive_effective']))

              precision    recall  f1-score   support

         0.0       0.77      1.00      0.87       309
         1.0       0.00      0.00      0.00        91

    accuracy                           0.77       400
   macro avg       0.39      0.50      0.44       400
weighted avg       0.60      0.77      0.67       400



In [27]:
# ease of use
print(classification_report(df['ease of use'], df['naive_easeofuse']))

              precision    recall  f1-score   support

         0.0       0.90      1.00      0.95       361
         1.0       0.00      0.00      0.00        39

    accuracy                           0.90       400
   macro avg       0.45      0.50      0.47       400
weighted avg       0.81      0.90      0.86       400



In [28]:
df.to_csv('ImageforWeeds_evaluation.csv')