In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [23]:
import os
from textblob import TextBlob

# Функция для извлечения рейтинга из имени файла
def extract_rating_from_filename(filename):
    # Удаление расширения файла и разделение строки по символу '_'
    base_name = os.path.splitext(filename)[0]
    _, rating_str = base_name.split('_')
    return int(rating_str)

# Путь к данным
path = ['aclImdb/train/neg', 'aclImdb/train/pos']

data = []
for directory_path in path:
    for filename in os.listdir(directory_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory_path, filename)
            
            # Проверка существования файла
            if not os.path.isfile(file_path):
                print(f"File '{file_path}' does not exist.")
                continue
            
            rating = extract_rating_from_filename(filename)
            
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    review_text = file.read().strip()
            except UnicodeDecodeError:
                try:
                    with open(file_path, 'r', encoding='latin-1') as file:
                        review_text = file.read().strip()
                except UnicodeDecodeError:
                    with open(file_path, 'r', encoding='cp1252') as file:
                        review_text = file.read().strip()
            
            # Вычисление тональности текста
            blob = TextBlob(review_text)
            sentiment = blob.sentiment.polarity

            # Добавление данных в список
            data.append({'review': review_text, 'sentiment': sentiment, 'rating': rating})

# Создание DataFrame
df = pd.DataFrame(data, columns=['review', 'sentiment', 'rating'])

print(df)

                                                  review  sentiment  rating
0      Story of a man who has unnatural feelings for ...  -0.071759       3
1      Airport '77 starts as a brand new luxury 747 p...   0.040492       4
2      This film lacked something I couldn't put my f...   0.079167       4
3      Sorry everyone,,, I know this is supposed to b...   0.043542       1
4      When I was little my parents took me along to ...  -0.055741       1
...                                                  ...        ...     ...
24995  Seeing as the vote average was pretty low, and...   0.297708       9
24996  The plot had some wretched, unbelievable twist...   0.250000       8
24997  I am amazed at how this movie(and most others ...   0.098681      10
24998  A Christmas Together actually came before my t...   0.118069       8
24999  Working-class romantic drama from director Mar...   0.128530       7

[25000 rows x 3 columns]
