# Zero shot classification

## [DeBerta-v3-base-mnli-fever-anli](https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli?candidateLabels=covid-19%2C+vaccine+efficacy%2C+vaccine+side+effects%2C+measles%2C+cholera%2C+home+remedies%2C+conspiracy&multiClass=true&text=actor+jamie+foxx+reportedly+par+lyzed+and+bl+nd+due+to+covid+19+vaccine+american+actor+jamie+foxx+has+reportedly+s+ffered+partial+par+lysis+following+a+covid+19+vaccine+complication+the+vaccine+according+to+hollywood+journalist+a+j+benza+resulted+in+a+bl+d+clot+in+his+brain+that+led+to+him+being+partially+par+lyzed+and+bl+nd+in+addition+to+a+series+of+other+complications+benza+claims+the+news+was+confirmed+by+a+source+close+to+foxx+jamie+had+a+bl+d+clot+in+his+brain+after+he+got+the+sh+t+he+did+not+want+the+sh+t+but+the+movie+he+was+on+he+was+pressured+to+get+it+confessed+the+journalist+who+formally+worked+as+a+columnist+for+the+new+york+daily+news+during+an+appearance+on+dr+drew+pinsky+s+online+show+ask+dr+drew+the+bl+d+clot+in+the+brain+caused+him+at+that+point+to+be+partially+par+lyzed+and+bl+nd+benza+alleged+as+he+insisted+his+insider+was+someone+in+the+room+with+first+hand+knowledge+of+foxx+s+hospitalization+foxx+was+admitted+to+the+hospital+after+s+ffering+a+medical+complication+on+thursday+april+11+according+to+his+daughter+corinne+foxx+luckily+due+to+quick+action+and+great+care+he+is+already+on+his+way+to+recovery+we+know+how+beloved+he+is+and+appreciate+your+prayers+the+family+asks+for+privacy+during+this+time+she+said+the+55+year+old+s+medical+woes+began+while+filming+his+latest+movie+back+in+action+the+movie+also+starring+cameron+diaz+is+said+to+have+moved+forward+with+production+without+foxx+using+a+body+double+in+his+place)

In [0]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")

In [0]:
import pandas as pd
pd.set_option('display.max_columns', None)

### Example

In [0]:
sequence_to_classify = "Angela Merkel is a politician in Germany and leader of the CDU"
candidate_labels = ["hello kitty", "dog", "entertainment", "environment"]
output = classifier(sequence_to_classify, candidate_labels, multi_label=True)
print(output)


### Read in data

In [0]:
# read in data export
df = pd.read_csv("amp.csv")

In [0]:
# set labels for themes
theme_labels = df['themeName'].unique()

# set labels for issues
issue_labels = df['issueName'].unique()

In [0]:
theme_labels

## Write function to classify text and return a df

In [0]:
def classify_themes(df):
    result_list = []
    for index, row in df.iterrows():
        text_sequence = row['text']
        result = classifier(text_sequence, theme_labels, multi_label = True)
        result['predictedThemes'] = result['labels'][0:3]
        result['themeScores'] = result['scores'][0:3]
        result_list.append(result)
    result_df = pd.DataFrame(result_list)[['sequence', 'predictedThemes', 'themeScores']]
    return result_df

In [0]:
def classify_issues(df):
    result_list = []
    for index, row in df.iterrows():
        text_sequence = row['text']
        result = classifier(text_sequence, issue_labels, multi_label = False)
        result['predictedIssues'] = result['labels'][0]
        result['issueScores'] = result['scores'][0]
        result_list.append(result)
    result_df = pd.DataFrame(result_list)[['sequence', 'predictedIssues', 'issueScores']]
    return result_df

In [0]:
df.columns

In [0]:
# predict themes
themes_df = classify_themes(df)

In [0]:
# predict issues
issues_df = classify_issues(df)

In [0]:
# combine results
results_df =  issues_df.merge(themes_df, how = "left", on = ["sequence"])

In [0]:
# write to csv
results_df.to_csv("predicted_amp.csv", index = False)

## Combine to original dataframe

In [0]:
df = df.merge(results_df, how = "left", left_on = "text", right_on = "sequence")

In [0]:
df.head()

In [0]:
mismatch = df[df["issueName"] != df["predictedIssues"]]

In [0]:
mismatch.shape

In [0]:
mismatch.dropna(subset = ["issueName"], inplace = True)

In [0]:
mismatch[["text", "issueName", "predictedIssues"]]

In [0]:
mismatch.to_csv("mismatch_issues.csv", index = False)