# 1. Perprocess
## 1.1 parse XML
use BeautifulSoup to parse XML file, output with pandas dataframe "sentence, E#A, sentiment"

In [70]:
def parseXML(path):
    with open(path) as xmldata:
        soup = BeautifulSoup(xmldata, "xml")
    # Create empty lists to store the extracted data
    sentence_ids = []
    texts = []
    categories = []
    polarities = []
    # Loop through the 'sentence' elements and extract the necessary information
    for sentence in soup.find_all('sentence'):
        opinions = sentence.find('Opinions')
        if opinions is not None:
            s_categories = []
            s_polarities = []
            for opinion in opinions.find_all('Opinion'):
                s_categories.append(opinion['category'])
                s_polarities.append(opinion['polarity'])
            categories.append(s_categories)
            polarities.append(s_polarities)
        else:
            continue
        sentence_ids.append(sentence['id'])
        texts.append(sentence.find('text').text)           

    # Create a pandas dataframe from the extracted data
    df = pd.DataFrame({'Sentence ID': sentence_ids,
                       'text': texts,
                       'label': categories,
                       'polarity': polarities})
    print(f'{path.split("/")[-1]} has been parsed. The number of sentences with opinions is '
          f"{len(soup.find_all('Opinions'))}({len(soup.find_all('sentence'))}).")
    
    return df

In [73]:
def parseXML_p2(path):
    with open(path) as xmldata:
        soup = BeautifulSoup(xmldata, "xml")
    # Create empty lists to store the extracted data
    review_rids = []
    categories = []
    polarities = []
    for review in soup.find_all('Review'):
        s_categories = []
        s_polarities = []
        for opinion in review.find_all('Opinion'):
            if opinion is not None:
                s_categories.append(opinion['category'])
                s_polarities.append(opinion['polarity'])
        categories.append(s_categories)
        polarities.append(s_polarities)
        review_rids.append(review['rid'])   
            

    # Create a pandas dataframe from the extracted data
    df = pd.DataFrame({'Review RID': review_rids,
                       'label': categories,
                       'polarity': polarities})
    print(f'{path.split("/")[-1]} has been parsed. The number of reviews with opinions is '
          f"{len(soup.find_all('Review'))}({len(df['label'])}).")
    
    return df

In [15]:
# # Data analysis
# import matplotlib.pyplot as plt

# # Split categories and create new rows
# df_copy = df.copy()
# split_categories = df_copy['Category'].str.split(',')
# df_copy = df_copy.assign(Category=split_categories).explode('Category')

# # Get the unique categories and their frequencies
# category_counts = df_copy['Category'].value_counts()
# #top_categories = category_counts.head(50)

# # Plot the frequencies in descending order
# category_counts.plot(kind='barh')
# plt.gca().invert_yaxis()
# plt.title('Category Frequencies')
# plt.xlabel('Frequency')
# plt.ylabel('Category')
# plt.show()

## 1.2 tokenisation, (stopwords,punc removal), lemmatization

In [16]:
def tokenisation(data):
    tokenised_data = [word_tokenize(sentence) for sentence in data]
    return tokenised_data


def remove_stopwords(data):
    stopwords = stopwords.words('english')
    filtered_words = [[word for word in sentence if word.lower() not in stopwords] for sentence in data]
    return filter_words
#print(stopwords.words('english'))
def remove_punctuation(data):
    text = [[word for word in sentence if re.sub(r'[^\w\s]+', '', word).isalnum()] for sentence in data]
    return text

In [17]:
##lemmatization
from nltk import stem
def lemmatization(data):
    wnl = stem.WordNetLemmatizer()
    lematized = [[wnl.lemmatize(word) for word in sentence] for sentence in tokenised_data]
    return lematized

In [18]:
#data = df['Text'].values.tolist()
#data = tokenisation(data)
#data = remove_stopwords(data)
#data = remove_punctuation(data)
#data = lemmatization(data)
#data = [[word.lower() for word in sentence] for sentence in data]
#print(data)

# 2. Part 1 - Sentence-level Aspect Based Sentiment Analysis

+ Features - unigrams, tfidf, .etc 
+ Model - Logistic Regression Classifier with threshold t

## 2.1 Features for Catagory Extraction

In [19]:
# from nltk.tag import pos_tag
# pos_data = [pos_tag(sentence,tagset='universal') for sentence in data]
# print(pos_data)

In [20]:
import re
import numpy as np
from bs4 import BeautifulSoup
import pandas as pd

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, classification_report

In [21]:
def feature_bow_around_verb(sentences):
    sentence_list = []
    verb_bow_list = []

    for sentence in sentences:
        verb_bow = []
        pos_tags = nltk.pos_tag(sentence)

        verbs = [word for word, pos in pos_tags if pos.startswith('VB')]

        if len(verbs) > 0:
            verb = verbs[0]
            verb_index = sentence.index(verb)

            for i in range(max(0, verb_index - 5), verb_index):
                if pos_tags[i][1].startswith('JJ') or pos_tags[i][1].startswith('RB') or pos_tags[i][1].startswith('NN'):
                    verb_bow.append(sentence[i])

            for i in range(verb_index + 1, min(verb_index + 6, len(sentence))):
                if pos_tags[i][1].startswith('JJ') or pos_tags[i][1].startswith('RB') or pos_tags[i][1].startswith('NN'):
                    verb_bow.append(sentence[i])

        sentence_list.append(sentence)
        verb_bow_list.append(verb_bow)

    df = pd.DataFrame({'Sentence': sentence_list, 'BoW around Verb': verb_bow_list})
    return df

In [22]:
def feature_bow_at_end_of_sentence(sentences):
    sentence_list = []
    end_of_sentence_bow_list = []

    for sentence in sentences:
        end_of_sentence_bow = []
        pos_tags = nltk.pos_tag(sentence)

        for i in range(len(pos_tags)-1, max(len(pos_tags)-6, -1), -1):
            if pos_tags[i][1].startswith('JJ') or pos_tags[i][1].startswith('RB'):
                end_of_sentence_bow.append(sentence[i])

        sentence_list.append(sentence)
        end_of_sentence_bow_list.append(end_of_sentence_bow)

    df = pd.DataFrame({'Sentence': sentence_list, 'BoW at End of Sentence': end_of_sentence_bow_list})
    return df

In [23]:
def custom_tokenizer(text):
    # Tokenize the text using NLTK's word_tokenize function
    tokens = word_tokenize(text)
    # Remove punctuation
    tokens = [word for word in tokens if re.sub(r'[^\w\s]+', '', word).isalnum()]
    # Remove stopwords
    stop_words = stopwords.words('english')
    tokens = [token for token in tokens if token not in stop_words]
    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]   
    return tokens

def feature_unigram(df, df_t):    
    # Create a CountVectorizer instance to compute unigram counts
    count_vectorizer = CountVectorizer(tokenizer=custom_tokenizer,max_features=1000)
    # Fit and transform the sentences to obtain the unigram count features
    count_matrix = count_vectorizer.fit_transform(df['text'])
    count_matrix_t = count_vectorizer.transform(df_t['text'])
    # Convert the count matrix to a dataframe with appropriate column names
    count_df = pd.DataFrame(count_matrix.toarray(), columns=count_vectorizer.get_feature_names())
    count_df_t = pd.DataFrame(count_matrix_t.toarray(), columns=count_vectorizer.get_feature_names())
    return count_df, count_df_t

def feature_Tfidf(df, df_t):
    # Create a TfidfVectorizer instance to compute TF-IDF features
    tfidf_vectorizer = TfidfVectorizer(tokenizer=custom_tokenizer)
    # Fit and transform the sentences to obtain the TF-IDF features
    tfidf_matrix = tfidf_vectorizer.fit_transform(df['text'])
    tfidf_matrix_t = tfidf_vectorizer.transform(df_t['text'])
    # Convert the TF-IDF matrix to a dataframe with appropriate column names
    tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names())
    tfidf_df_t = pd.DataFrame(tfidf_matrix_t.toarray(), columns=tfidf_vectorizer.get_feature_names())
    return tfidf_df, tfidf_df_t

## 2.2 Logistic Regression Classifier for Catagory Extraction 

In [24]:
def catagory_extraction_LogisticRegressionClf():
    #data preprocessing
    Laptops_Train_p1 = parseXML("./data/Laptops_Train_p1.xml")
    Laptops_Test_p1_gold = parseXML("./data/Laptops_Test_p1_gold.xml")

    train = pd.DataFrame({'text': Laptops_Train_p1['Text'],
                          'label': Laptops_Train_p1['Categories']})
    test = pd.DataFrame({'text': Laptops_Test_p1_gold['Text'],
                         'label': Laptops_Test_p1_gold['Categories']})

    # Concatenate the original dataframe with the unigram count dataframe and the TF-IDF dataframe
    unigram_train, unigram_test = feature_unigram(train, test)
    tfidf_train, tfidf_test = feature_Tfidf(train, test)
    x_train = pd.concat([unigram_train, tfidf_train], axis=1)
    x_test = pd.concat([unigram_test, tfidf_test], axis=1)
    # x_train = unigram_train
    # x_test = unigram_test

    # Convert the labels into binary arrays
    mlb = MultiLabelBinarizer()
    y = mlb.fit_transform(train['label'])
    y_test = mlb.transform(test['label'])
    # Create a dataframe for the binary label arrays
    y_train = pd.DataFrame(y, columns=mlb.classes_)
    y_test = pd.DataFrame(y_test, columns=mlb.classes_)
    
    # Create an instance of LogisticRegression
    clf = LogisticRegression(max_iter=1000)
    y_pred = np.zeros(y_test.shape)
    # Iterate through each label and train a separate binary classifier
    for i in range(y_train.shape[1]):
        label = mlb.classes_[i]
        print("Training classifier for label:", label)
        # Fit the model on the training data for the current label
        clf.fit(x_train, y_train.iloc[:, i])
        # Predict the probabilities of the current label for the testing data
        y_pred_prob = clf.predict_proba(x_test)[:, 1] # Use probabilities of positive class (1)

        # Set the threshold for category assignment
        threshold = 0.2

        # Generate predicted labels based on threshold
        y_pred_labels = np.where(y_pred_prob >= threshold, 1, 0)

        # Add predicted labels to the corresponding column in the binary label array
        y_pred[:, i] = y_pred_labels.tolist()

#     # Calculate accuracy
#     accuracy = accuracy_score(y_test, y_pred)
#     print("Accuracy:", accuracy)

    # Generate classification report
    report = classification_report(y_test, y_pred, zero_division=1)
    print("Classification Report:\n", report)
    
    test_predicted_labels = mlb.inverse_transform(binary_labels)    
    return test_predicted_labels

In [25]:
#catagory_extraction_LogisticRegressionClf()

## 2.3 Features for Sentiment Analysis

In [53]:
def dataframe_expansion(df):
    original_labels = df['label'].tolist()
    original_polarities = df['polarity'].tolist()
    
    # ==advanced indexing==
    reps = [len(val) for val in df['label']]
    df = df.loc[np.repeat(df.index.values, reps)]
    
    df['label'] = [item for sublist in original_labels for item in sublist]
    df['polarity'] = [item for sublist in original_polarities for item in sublist]
    
    return df

def dataframe_undoexpansion(y_pred, test):
    y_pred = pd.DataFrame({'polarity': y_pred})
    y_pred = inverse_numerical_rep(y_pred['polarity'])
    y_pred = y_pred.tolist()
    y_aligned = []
    for index, row in test.iterrows():
        sublist = []
        for i in range(len(row['polarity'])):
            sublist.append(y_pred.pop(0))
        y_aligned.append(sublist)
    test_pred = test.copy()
    test_pred['polarity'] = y_aligned
    return test_pred

def onehotting(x_train, x_test):
    # Convert 'label' column in x_train and x_test to one-hot encoding
    x_train_onehot = pd.get_dummies(x_train['label'], prefix='class')
    x_test_onehot = pd.get_dummies(x_test['label'], prefix='class')

    # Get the columns that are missing in x_test_onehot
    missing_cols = set(x_train_onehot.columns) - set(x_test_onehot.columns)
    for col in missing_cols:
        x_test_onehot[col] = 0

    # Reorder the columns in x_test_onehot to match the column names in x_train_onehot
    x_test_onehot = x_test_onehot[x_train_onehot.columns]
    
    # Concatenate x_train and x_test with one-hot encoded columns
    x_train = pd.concat([x_train, x_train_onehot], axis=1)
    x_test = pd.concat([x_test, x_test_onehot], axis=1)

    return x_train, x_test

def numerical_rep(df):
    df = df.copy()
    y_dict = {'positive': 1, 'negative': -1, 'neutral': 0}
    y = df.map(y_dict)
    return y

def inverse_numerical_rep(df):
    df = df.copy()
    y_dict = {'positive': 1, 'negative': -1, 'neutral': 0}
    reverse_y_dict = {v: k for k, v in y_dict.items()} # Reverse the keys and values in y_dict
    y = df.map(reverse_y_dict)
    return y

In [58]:
def sentiment_analysis_feature_selection(train, test, features='unigram'):
    """
    parameter:
        train,test: df
        dataset being trained and tested on
        features: String
        which consists of customised features "unigram,Tfidf,..." splitted by "," 
    
    returns:
        x_train, x_test, y_train, y_test = DataFrame
        only includes the data needed for training and test.
    """
    # Customised
    # Concatenate the original dataframe with the features selected
    # Ignore upper/lower case, leading or trailing whitespaces
    features_set = {'tfidf','unigram'}
    customised_features_set = set(feature.strip().lower() for feature in features.split(","))
    if not customised_features_set.issubset(features_set):
        raise ValueError("Please input with right features.")

    features_list = []
    # Appending tuples at first
    if 'tfidf' in customised_features_set:
        features_list.append(feature_unigram(train, test))
    if 'unigram' in customised_features_set:
        features_list.append(feature_Tfidf(train, test))
    # Unpacking
    x_train_features = []
    x_test_features = []
    for x_train_feature, x_test_feature in features_list:
        x_train_features.append(x_train_feature)
        x_test_features.append(x_test_feature)
    # convert to DataFrame
    x_train = pd.concat(x_train_features, axis=1)
    x_test = pd.concat(x_test_features, axis=1)
    
    # Default
    # numerical representation of polarity
    xy_train = dataframe_expansion(pd.concat([train, x_train], axis=1))
    xy_test = dataframe_expansion(pd.concat([test, x_test], axis=1))
    y_train = numerical_rep(xy_train['polarity'])
    y_test = numerical_rep(xy_test['polarity'])
    # label one-hotting
    x_train, x_test = onehotting(xy_train, xy_test)
    # Drop the original 'label' column from x_train and x_test
    x_train.drop(train.columns.columns.values.tolist(), axis=1, inplace=True)
    x_test.drop(test.columns.values.tolist(), axis=1, inplace=True)
    
    return x_train, x_test, y_train, y_test

## 2.4 Logistic Regression Classifer for Sentiment Analysis

In [64]:
def sentiment_anaylysis_LogisticRegressionClf(features='unigram'):
    """
        Train the classifer using selected data, make predictions on training data and 
        Generate classification report for training data
        
        parameter:        
            features: String
            which consists of customised features "unigram,Tfidf,..." splitted by "," 
            and will be used by the training of classifier
    """
    Laptops_Train_p1 = parseXML("./data/Laptops_Train_p1.xml")
    Laptops_Test_p1_gold = parseXML("./data/Laptops_Test_p1_gold.xml")

    # Feature selection
    x_train, x_test, y_train, y_test = sentiment_analysis_feature_selection(Laptops_Train_p1, Laptops_Test_p1_gold, features=features)
        
    # Create an instance of LogisticRegression
    clf = LogisticRegression(max_iter=1000)
    clf.fit(x_train, y_train)

    # Make predictions on training data
    y_pred = clf.predict(x_test)

    # Generate classification report for training data
    classification_report_train = classification_report(y_test, y_pred)
    print("Classification Report (Training Data):\n", classification_report_train)
    
    # Data format alignment
    test_aligned = dataframe_undoexpansion(y_pred, Laptops_Test_p1_gold)
    
    return test_aligned

In [65]:
test_aligned = sentiment_anaylysis_LogisticRegressionClf(features='unigram')

Laptops_Train_p1.xml has been parsed. The number of sentences with opinions is 2039(2500).
Laptops_Test_p1_gold.xml has been parsed. The number of sentences with opinions is 808(808).
Classification Report (Training Data):
               precision    recall  f1-score   support

          -1       0.66      0.68      0.67       274
           0       0.33      0.07      0.11        46
           1       0.79      0.84      0.81       481

    accuracy                           0.74       801
   macro avg       0.60      0.53      0.53       801
weighted avg       0.72      0.74      0.72       801



# 3. Part 2 - Text-level Aspect Based Sentiment Analysis

In [92]:
from collections import Counter

In [74]:
Laptops_Train_p2 = parseXML_p2("./data/Laptops_Train_p2.xml")
Laptops_Test_p2_gold = parseXML_p2("./data/Laptops_Test_p2_gold.xml")

Laptops_Train_p2.xml has been parsed. The number of reviews with opinions is 395(395).
Laptops_Test_p2_gold.xml has been parsed. The number of reviews with opinions is 80(80).


In [75]:
test_aligned

Unnamed: 0,Sentence ID,text,label,polarity
0,B0074703CM_108_ANONYMOUS:0,"Well, my first apple computer and I am impressed.",[LAPTOP#GENERAL],[positive]
1,B0074703CM_108_ANONYMOUS:1,"Works well, fast and no reboots.",[LAPTOP#OPERATION_PERFORMANCE],[positive]
2,B0074703CM_108_ANONYMOUS:2,Waiting to install MS Office and see how it go...,[],[]
3,B0074703CM_108_ANONYMOUS:3,"Have always been a PC guy, but decided to try ...",[],[]
4,B0074703CM_108_ANONYMOUS:4,Glad I did so far.,"[COMPANY#GENERAL, LAPTOP#GENERAL]","[positive, positive]"
...,...,...,...,...
803,B00L156USY_7_ANONYMOUS:2,When I attach a mouse it's fine but the touchp...,[MOUSE#USABILITY],[negative]
804,B00L156USY_7_ANONYMOUS:3,I went through the settings and there isn't a ...,[MOUSE#USABILITY],[negative]
805,B00L156USY_7_ANONYMOUS:4,Thinking I received the laptop with a faulty t...,[],[]
806,B00L156USY_7_ANONYMOUS:5,"If anyone can help me out on this, that would ...",[],[]


In [86]:
def group_to_text_level(df):
    
    df = df.copy()
    
    # Split the 'Sentence ID' column by ':' and only keep the first part
    df['Sentence ID'] = df['Sentence ID'].apply(lambda x: x.split(':')[0])

    # Group the rows by 'Sentence ID' and combine the 'label' and 'polarity' values into a list
    grouped_df = df.groupby('Sentence ID',sort=False).agg({'label': lambda x: sum(x, []), 'polarity': lambda x: sum(x, [])})

    # Reset the index to turn 'Sentence ID' back into a regular column
    grouped_df = grouped_df.reset_index()

    grouped_df = grouped_df.rename(columns={'Sentence ID': 'Review RID'})
    
    return grouped_df

# def get_most_frequent_polarity_dict(df):
    
    
def get_polarities_frequency_dict(rp):
    frequency_distribution = Counter(row['polarity'])
    return frequency_distribution

In [87]:
Laptops_Test_p2_gold

Unnamed: 0,Review RID,label,polarity
0,B0074703CM_108_ANONYMOUS,"[LAPTOP#OPERATION_PERFORMANCE, COMPANY#GENERAL...","[positive, positive, positive]"
1,B00GJUQ4Z0_10_ANONYMOUS,"[LAPTOP#OPERATION_PERFORMANCE, LAPTOP#USABILIT...","[negative, negative, negative, negative]"
2,B0146DD02G_18_ANONYMOUS,"[LAPTOP#MISCELLANEOUS, LAPTOP#OPERATION_PERFOR...","[positive, positive, positive, positive, posit..."
3,B0074703CM_268_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#DESIGN_FEATURES, HARD_DI...","[positive, positive, positive, positive, posit..."
4,B0074703CM_240_ANONYMOUS,"[LAPTOP#QUALITY, LAPTOP#PRICE, DISPLAY#QUALITY...","[positive, positive, positive, positive, posit..."
...,...,...,...
75,B0074703CM_282_ANONYMOUS,"[LAPTOP#MISCELLANEOUS, SOFTWARE#OPERATION_PERF...","[negative, negative, negative, negative, negat..."
76,B00LNFCADQ_5_ALWWKZCL1CKGD,"[LAPTOP#CONNECTIVITY, SUPPORT#QUALITY, KEYBOAR...","[negative, negative, negative, negative, negat..."
77,B0074703CM_6_ANONYMOUS,"[LAPTOP#PRICE, OS#GENERAL, COMPANY#GENERAL, OS...","[positive, positive, positive, positive, negat..."
78,B0074703CM_173_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#OPERATION_PERFORMANCE, L...","[negative, positive, positive, positive, posit..."


In [88]:
def text_level_ABSA(test_pred):
    
    grouped_sentence = group_to_text_level(test_pred)
    
    


In [89]:
grouped_df

Unnamed: 0,Sentence ID,label,polarity
0,B0074703CM_108_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#OPERATION_PERFORMANCE,...","[positive, positive, positive, positive]"
1,B00GJUQ4Z0_10_ANONYMOUS,"[LAPTOP#OPERATION_PERFORMANCE, LAPTOP#OPERATIO...","[negative, negative, positive, negative, posit..."
2,B0146DD02G_18_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#MISCELLANEOUS, LAPTOP#...","[positive, positive, positive, positive, posit..."
3,B0074703CM_268_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, LAPTOP#PRICE, L...","[positive, positive, neutral, negative, positi..."
4,B0074703CM_240_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, DISPLAY#QUALITY...","[positive, positive, positive, positive]"
...,...,...,...
75,B0074703CM_282_ANONYMOUS,"[LAPTOP#MISCELLANEOUS, SOFTWARE#OPERATION_PERF...","[negative, negative, negative, negative, negat..."
76,B00LNFCADQ_5_ALWWKZCL1CKGD,"[LAPTOP#GENERAL, LAPTOP#GENERAL, LAPTOP#CONNEC...","[negative, negative, negative, negative, negat..."
77,B0074703CM_6_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, LAPTOP#GENERAL,...","[positive, positive, negative, positive, posit..."
78,B0074703CM_173_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#PRICE, LAPTOP#OPERATIO...","[negative, positive, positive, negative, negat..."


In [90]:
def get_most_frequent_polarity_dist(row):
    dict1 = {}
    for key, value in zip(row['label'], row['polarity']):
        if key in dict1:
            dict1[key].append(value)
        else:
            dict1[key] = [value]
    most_frequent_values = {}
    for key, value_list in dict1.items():
        most_frequent_values[key] = max(set(value_list), key=value_list.count)

    return most_frequent_values

df = group_to_text_level(test_aligned)
df['most_frequent_polarity_dist'] = df.apply(get_most_frequent_polarity_dist, axis=1, result_type='expand')
df['polarity_distribution'] = df.apply(get_polarities_frequency_dict, axis=1, result_type='expand')


In [91]:
df

Unnamed: 0,Review RID,label,polarity,most_frequent_polarity_dist,polarity_distribution
0,B0074703CM_108_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#OPERATION_PERFORMANCE,...","[positive, positive, positive, positive]","{'LAPTOP#GENERAL': 'positive', 'LAPTOP#OPERATI...",{'positive': 4}
1,B00GJUQ4Z0_10_ANONYMOUS,"[LAPTOP#OPERATION_PERFORMANCE, LAPTOP#OPERATIO...","[negative, negative, positive, negative, posit...","{'LAPTOP#OPERATION_PERFORMANCE': 'negative', '...","{'negative': 3, 'positive': 2}"
2,B0146DD02G_18_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#MISCELLANEOUS, LAPTOP#...","[positive, positive, positive, positive, posit...","{'LAPTOP#GENERAL': 'positive', 'LAPTOP#MISCELL...",{'positive': 8}
3,B0074703CM_268_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, LAPTOP#PRICE, L...","[positive, positive, neutral, negative, positi...","{'LAPTOP#PRICE': 'positive', 'LAPTOP#GENERAL':...","{'positive': 5, 'neutral': 1, 'negative': 1}"
4,B0074703CM_240_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, DISPLAY#QUALITY...","[positive, positive, positive, positive]","{'LAPTOP#PRICE': 'positive', 'LAPTOP#GENERAL':...",{'positive': 4}
...,...,...,...,...,...
75,B0074703CM_282_ANONYMOUS,"[LAPTOP#MISCELLANEOUS, SOFTWARE#OPERATION_PERF...","[negative, negative, negative, negative, negat...","{'LAPTOP#MISCELLANEOUS': 'negative', 'SOFTWARE...","{'negative': 11, 'positive': 2}"
76,B00LNFCADQ_5_ALWWKZCL1CKGD,"[LAPTOP#GENERAL, LAPTOP#GENERAL, LAPTOP#CONNEC...","[negative, negative, negative, negative, negat...","{'LAPTOP#GENERAL': 'negative', 'LAPTOP#CONNECT...",{'negative': 14}
77,B0074703CM_6_ANONYMOUS,"[LAPTOP#PRICE, LAPTOP#GENERAL, LAPTOP#GENERAL,...","[positive, positive, negative, positive, posit...","{'LAPTOP#PRICE': 'positive', 'LAPTOP#GENERAL':...","{'positive': 10, 'negative': 1}"
78,B0074703CM_173_ANONYMOUS,"[LAPTOP#GENERAL, LAPTOP#PRICE, LAPTOP#OPERATIO...","[negative, positive, positive, negative, negat...","{'LAPTOP#GENERAL': 'negative', 'LAPTOP#PRICE':...","{'negative': 5, 'positive': 3}"
