In [72]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

In [81]:
data = pd.read_csv('data.csv')

X = data['activity']
y = data['category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=36)

vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

X_train_tensor = torch.tensor(X_train_counts.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_counts.toarray(), dtype=torch.float32)

label_to_idx = {label: idx for idx, label in enumerate(y.unique())}
y_train_idx = torch.tensor([label_to_idx[label] for label in y_train], dtype=torch.long)
y_test_idx = torch.tensor([label_to_idx[label] for label in y_test], dtype=torch.long)

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, hidden_size4, num_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.relu3 = nn.ReLU()
        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.relu4 = nn.ReLU()
        self.fc5 = nn.Linear(hidden_size4, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.fc4(out)
        out = self.relu4(out)
        out = self.fc5(out)
        return out

# параметры
input_size = X_train_tensor.shape[1]
hidden_size1 = 1800
hidden_size2 = 1200
hidden_size3 = 900
hidden_size4 = 300
num_classes = len(label_to_idx)
learning_rate = 0.001
num_epochs = 1000

model = NeuralNetwork(input_size, hidden_size1, hidden_size2, hidden_size3, hidden_size4, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [37]:
# обучение
for epoch in range(num_epochs):
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_idx)
    
    # оптимизация
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [68]:
# оценка
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test_idx).sum().item() / len(y_test_idx)
    print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.7694


In [36]:
# сохранение модели
torch.save(model.state_dict(), 'trained_model.pth')

In [67]:
# загрузка 
model.load_state_dict(torch.load('trained_model.pth'))

<All keys matched successfully>

In [80]:
# задаем занятие
your_activity = [str(input())]

vectorizer = CountVectorizer(vocabulary=vectorizer.vocabulary_)
your_activity_counts = vectorizer.transform(your_activity)
your_activity_tensor = torch.tensor(your_activity_counts.toarray(), dtype=torch.float32)

with torch.no_grad():
    prediction = model(your_activity_tensor)
    predicted_category_idx = torch.argmax(prediction).item()
    idx_to_label = {idx: label for label, idx in label_to_idx.items()}
    predicted_category = idx_to_label[predicted_category_idx]

print("прогноз ии:", predicted_category)

прогноз ии: Everyday_Activities
