# NLI for ABSA

In [2]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

def extract_sentiment_expression_nli(review, aspects, model_name='facebook/bart-large-mnli'):
    # Check if GPU is available
    device = 0 if torch.cuda.is_available() else -1

    # Load NLI model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
    
    # Create a pipeline for NLI, specifying the device
    nli_pipeline = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer, device=device)

    # Define possible labels for sentiment
    candidate_labels = ['positive', 'negative']

    # Store the answers
    answers = {}

    # Iterate over the provided aspects to construct the NLI inputs
    for aspect in aspects:
        # Formulate the hypothesis
        hypothesis = f"The sentiment for the aspect '{aspect}' is"

        # Use the NLI pipeline to predict the sentiment for each aspect
        response = nli_pipeline(
            sequences=review,   # Premise: The review text
            candidate_labels=[f"{hypothesis} {label}" for label in candidate_labels],  # Hypotheses
        )

        # Extract the sentiment with the highest score
        sentiment = response['labels'][0].split()[-1]  # Get the last word ('positive' or 'negative')
        answers[aspect] = sentiment

    return answers

# Example usage
review = "The quality is good but the price is expensive."
aspects = ["quality", "price"]

# Extract sentiment expressions for the review and specified aspects
sentiment_expressions = extract_sentiment_expression_nli(review, aspects)
print(f"Review: '{review}' => Sentiment Expressions: {sentiment_expressions}")


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Review: 'The quality is good but the price is expensive.' => Sentiment Expressions: {'quality': 'positive', 'price': 'negative'}


In [3]:
import pandas as pd

# Read the Excel file
file_path = 'predicted_labels_v4df_mar-24.xlsx'
df = pd.read_excel(file_path)
df.head()

Unnamed: 0,Survey ID,Product Name,Print Customer Region,LTR,Source Type,Survey language,Review Source,Star Rating,Product Family,Supplies Family,...,max_predicted_probability,max_predicted_aspect,0,Aspect 1,Aspect 2,Aspect 3,Aspect 4,Output Labels,Label Vectors,Predicted Labels
0,101618952,HP 63 Black Original Ink Cartridge,US,10,Web Reviews,English,Walmart,5.0,Supplies - Ink,Dolmen Refresh,...,0.954531,0,,Customer Service,,Price,,"['Customer Service', 'Price']","[0, 0, 1, 1]","['Price', 'Customer Service']"
1,110317001,HP 63XL High Yield Black Original Ink Cartridge,US,10,Web Reviews,English,HP US,5.0,Supplies - Ink,Dolmen Refresh,...,0.766199,Delivery,,,Delivery,,,['Delivery'],"[1, 0, 0, 0]",['Delivery']
2,113652950,HP 63 Black Original Ink Cartridge,US,10,Web Reviews,English,Walmart,5.0,Supplies - Ink,Dolmen Refresh,...,0.809284,Price,,,,Price,,['Price'],"[0, 0, 1, 0]",['Price']
3,117483634,HP 65XL Tri-color Original Ink Cartridge,US,10,Web Reviews,English,Walmart,5.0,Supplies - Ink,Dolmen Refresh,...,0.780703,Price,,,,Price,,['Price'],"[0, 0, 1, 0]",['Price']
4,118156397,HP 951XL 3-pack High Yield Cyan/Magenta/Yellow...,US,10,Web Reviews,English,Walmart,5.0,Supplies - Ink,Nesta+,...,0.807181,Delivery,,,Delivery,,,['Delivery'],"[1, 0, 0, 0]",['Delivery']


In [4]:
import ast

# Convert the 'Sentiment Expressions' column from string representation to dictionary
df['Predicted Labels'] = df['Predicted Labels'].apply(ast.literal_eval)

In [None]:
# Applying the function to the DataFrame
def process_dataframe(df):
    # Since the 'Predicted Labels' are already lists, we rename this step
    df['Aspect List'] = df['Predicted Labels']  # Copy over the already processed list

    # Apply the sentiment extraction function to each row
    df['Sentiment Expressions'] = df.apply(
        lambda row: extract_sentiment_expression_nli(row['Combined Text'], row['Aspect List']), axis=1
    )

    return df

# Apply to df
df_new = process_dataframe(df)

# Save the new DataFrame to an Excel file
df_new.to_excel('processed_sentiment_data_v4df_Apr-24_NLI.xlsx', index=False)

print("Data saved to 'processed_sentiment_v4df_Apr-24_NLI.xlsx'")


In [None]:
# Function to process the sentiment expression and assign the label
def process_sentiment_label(sentiment):
    if any(keyword in sentiment for keyword in ['Negative', 'Concern', 'Not Satisfied', 'Mixed']):
        return 'Negative'
    elif any(keyword in sentiment for keyword in ['Positive', 'Satisfied']):
        return 'Positive'
    else:
        return 'Neutral'

# Function to expand rows for each aspect and sentiment
def expand_rows_for_aspects(df):
    expanded_rows = []

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        # Get the sentiment dictionary from the row
        sentiments = row['Sentiment Expressions']  # This is a dict e.g. {'Price': 'Positive', 'Customer Service': 'Negative'}

        # Check if sentiments is a dictionary and not empty
        if isinstance(sentiments, dict) and sentiments:
            # Iterate over each aspect in the sentiment dictionary
            for aspect, sentiment_expression in sentiments.items():
                new_row = row.copy()  # Copy the current row
                
                # Create a new column for the current aspect
                new_row['Aspect'] = aspect
                
                # Create a new column for the sentiment label based on the sentiment expression
                new_row['Predicted Sentiment'] = process_sentiment_label(sentiment_expression)
                
                # Append the new row to the list
                expanded_rows.append(new_row)
        else:
            # If there are no sentiments, append the original row without modifications
            expanded_rows.append(row)

    # Create a new DataFrame from the expanded rows
    expanded_df = pd.DataFrame(expanded_rows)
    
    # Filter out rows where 'Predicted Sentiment' is blank or NaN
    expanded_df = expanded_df[expanded_df['Predicted Sentiment'].notna() & (expanded_df['Predicted Sentiment'] != '')]
    
    return expanded_df


# 'df_new' contains the columns 'Sentiment Expressions', which is a dictionary of aspects and sentiments
df_expanded = expand_rows_for_aspects(df_new)

# Save the expanded DataFrame to an Excel file
df_expanded.to_excel('expanded_sentiment_data_apr_NLI-24.xlsx', index=False)

print("Expanded data saved to 'expanded_sentiment_data_apr-24_NLI.xlsx'")