In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
file_path = 'pred_aspects_with_sent.txt'
df = pd.read_csv(file_path, sep='\t', header=None, names=['text_id', 'category', 'aspect', 'start', 'end', 'sentiment'])
df.head(5)

Unnamed: 0,text_id,category,aspect,start,end,sentiment
0,13823,Whole,аппетит,8,15,neutral
1,13823,Service,встретил,138,146,neutral
2,13823,Service,менеджер,147,155,neutral
3,13823,Service,девушка,179,186,neutral
4,13823,Service,проводила к столу,188,205,neutral


In [3]:
all_categories = ['Whole', 'Service', 'Interior', 'Food', 'Price']

In [4]:
# создаем словарь для сохранения результатов
result_dict = defaultdict(lambda: {category: [] for category in all_categories})

In [5]:
for _, row in df.iterrows():
    text_id = row['text_id']
    category = row['category']
    sentiment = row['sentiment']

    # добавляем тональность в соответствующую категорию
    result_dict[text_id][category].append(sentiment)

In [6]:
# преобразуем списки в одно значение тональности
def determine_overall_sentiment(sentiments):
    sentiment_counts = defaultdict(int)
    for sentiment in sentiments:
        sentiment_counts[sentiment] += 1

    if sentiment_counts['positive'] > 0 and not sentiment_counts['negative']:
        return 'positive'
    elif sentiment_counts['negative'] > 0 and not sentiment_counts['positive']:
        return 'negative'
    elif sentiment_counts['negative'] > 0 and sentiment_counts['positive']:
        return 'both'
    elif not sentiments:
        return 'absence'
    else:
        return 'neutral'

In [7]:
for text_id, categories in result_dict.items():
    for category, sentiments in categories.items():
        overall_sentiment = determine_overall_sentiment(sentiments)
        result_dict[text_id][category] = overall_sentiment

In [8]:
# создаем новый датафрейм из словаря результатов
result_list = []
for text_id, categories in result_dict.items():
    for category, sentiment in categories.items():
        result_list.append([text_id, category, sentiment])

In [9]:
result_df = pd.DataFrame(result_list, columns=['review_id', 'category', 'sentiment'])

In [10]:
result_df.head(15)

Unnamed: 0,review_id,category,sentiment
0,13823,Whole,positive
1,13823,Service,positive
2,13823,Interior,absence
3,13823,Food,positive
4,13823,Price,positive
5,1427,Whole,positive
6,1427,Service,both
7,1427,Interior,both
8,1427,Food,positive
9,1427,Price,positive


In [11]:
#сохраняем результаты в файл
output_file_path = 'pred_full_results.txt'
result_df.to_csv(output_file_path, sep='\t', index=False)

print(f"Results saved to {output_file_path}")


Results saved to pred_full_results.txt
