# Соревнонание по DL

# Первое решение CNN + LSTM

Идея заключается в том, что каждый рассматривая sequence_id как временной ряд показаний акселерометра и гироскопа. Для этого последовательности приводятся к фиксированной длине и подаются в гибридную модель CNN+LSTM: сверточная часть извлекает локальные паттерны движений, а LSTM моделирует их временную динамику. Полученные признаки объединяются и используются для классификации одной из 6 активностей

**accuracy в kaggle: 0.846**

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

Загрузка и группировка

In [10]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

le = LabelEncoder()
train_df['gesture_encoded'] = le.fit_transform(train_df['gesture'])
num_classes = len(le.classes_)

In [11]:
def group_sequences(df, include_labels=True):
    seq_dict = {}
    for seq_id, group in df.groupby('sequence_id'):
        X = group[['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']].values.astype(np.float32)
        if include_labels:
            y = group['gesture_encoded'].iloc[0]
            seq_dict[seq_id] = (X, y)
        else:
            seq_dict[seq_id] = X
    return seq_dict

In [12]:
train_seq = group_sequences(train_df, include_labels=True)
test_seq = group_sequences(test_df, include_labels=False)

Паддинг до фикс. длины

In [13]:
MAX_LEN = 128

In [14]:
def pad_to_max_len(X, max_len=MAX_LEN):
    if len(X) > max_len:
        return X[:max_len]
    else:
        pad = np.zeros((max_len - len(X), X.shape[1]), dtype=np.float32)
        return np.concatenate([X, pad], axis=0)

In [15]:
X_train, y_train, train_ids = [], [], []
for seq_id, (X, y) in train_seq.items():
    X_train.append(pad_to_max_len(X))
    y_train.append(y)
    train_ids.append(seq_id)

X_test, test_ids = [], []
for seq_id, X in test_seq.items():
    X_test.append(pad_to_max_len(X))
    test_ids.append(seq_id)

X_train

[array([[ 4.00089994e-02,  1.44200004e-03, -1.04955003e-01,
          2.99376369e-01, -6.37282014e-01, -1.86307877e-01,
         -6.85226083e-01],
        [-2.17600004e-03,  5.93676984e-01, -4.88100015e-03,
          5.23463845e-01, -5.14698505e-01, -3.14939678e-01,
         -6.01567984e-01],
        [-2.52597004e-01, -1.09816000e-01,  3.22773010e-01,
          6.56401336e-01, -4.23068762e-01, -3.17673534e-01,
         -5.37804425e-01],
        [ 6.91000023e-04, -3.00339997e-01,  3.58442008e-01,
          6.69966459e-01, -4.05834615e-01, -3.47042918e-01,
         -5.15756190e-01],
        [ 1.31265998e-01, -3.54656994e-01,  6.23360015e-02,
          5.82976878e-01, -4.72840399e-01, -3.58765006e-01,
         -5.54840147e-01],
        [-9.52960029e-02,  9.52270031e-02, -2.57416010e-01,
          3.58763963e-01, -6.13711059e-01, -2.05058619e-01,
         -6.72754169e-01],
        [ 1.93155006e-01,  6.38742030e-01,  6.04143977e-01,
          2.34920934e-01, -6.62995815e-01, -1.15045682e-01

In [16]:
X_train = torch.tensor(np.stack(X_train))
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(np.stack(X_test))

Модель на pytorch

In [17]:
class CNN_LSTM(nn.Module):
    def __init__(self, input_dim=7, seq_len=128, num_classes=6):
        super().__init__()
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=5, padding=2)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.AdaptiveMaxPool1d(1)

        self.lstm = nn.LSTM(input_dim, 64, batch_first=True)

        self.classifier = nn.Sequential(
            nn.Linear(128 + 64, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x_t = x.permute(0, 2, 1)

        c = torch.relu(self.conv1(x_t))
        c = torch.relu(self.conv2(c))
        c = self.pool(c).squeeze(-1)

        _, (h, _) = self.lstm(x)
        l = h.squeeze(0)

        combined = torch.cat([c, l], dim=1)
        out = self.classifier(combined)
        return out

Обучение

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN_LSTM(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [19]:
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [20]:
model.train()

for epoch in range(25):
    total_loss = 0
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Loss: {total_loss/len(train_loader):.4f}")

Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 25.41it/s]


Loss: 1.7581


Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 28.67it/s]


Loss: 1.6899


Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 37.14it/s]


Loss: 1.6417


Epoch 4: 100%|██████████| 8/8 [00:00<00:00, 36.55it/s]


Loss: 1.5439


Epoch 5: 100%|██████████| 8/8 [00:00<00:00, 36.40it/s]


Loss: 1.4250


Epoch 6: 100%|██████████| 8/8 [00:00<00:00, 33.61it/s]


Loss: 1.2962


Epoch 7: 100%|██████████| 8/8 [00:00<00:00, 36.12it/s]


Loss: 1.1259


Epoch 8: 100%|██████████| 8/8 [00:00<00:00, 35.84it/s]


Loss: 1.0379


Epoch 9: 100%|██████████| 8/8 [00:00<00:00, 36.96it/s]


Loss: 0.9950


Epoch 10: 100%|██████████| 8/8 [00:00<00:00, 34.70it/s]


Loss: 0.8837


Epoch 11: 100%|██████████| 8/8 [00:00<00:00, 37.04it/s]


Loss: 0.8550


Epoch 12: 100%|██████████| 8/8 [00:00<00:00, 38.75it/s]


Loss: 0.8263


Epoch 13: 100%|██████████| 8/8 [00:00<00:00, 37.63it/s]


Loss: 0.7577


Epoch 14: 100%|██████████| 8/8 [00:00<00:00, 37.01it/s]


Loss: 0.6949


Epoch 15: 100%|██████████| 8/8 [00:00<00:00, 36.78it/s]


Loss: 0.6543


Epoch 16: 100%|██████████| 8/8 [00:00<00:00, 38.12it/s]


Loss: 0.5910


Epoch 17: 100%|██████████| 8/8 [00:00<00:00, 38.02it/s]


Loss: 0.5416


Epoch 18: 100%|██████████| 8/8 [00:00<00:00, 38.33it/s]


Loss: 0.5111


Epoch 19: 100%|██████████| 8/8 [00:00<00:00, 37.78it/s]


Loss: 0.4757


Epoch 20: 100%|██████████| 8/8 [00:00<00:00, 36.40it/s]


Loss: 0.4212


Epoch 21: 100%|██████████| 8/8 [00:00<00:00, 38.01it/s]


Loss: 0.4105


Epoch 22: 100%|██████████| 8/8 [00:00<00:00, 35.66it/s]


Loss: 0.3990


Epoch 23: 100%|██████████| 8/8 [00:00<00:00, 36.34it/s]


Loss: 0.3615


Epoch 24: 100%|██████████| 8/8 [00:00<00:00, 34.15it/s]


Loss: 0.3860


Epoch 25: 100%|██████████| 8/8 [00:00<00:00, 36.26it/s]

Loss: 0.3205





Предсказание и сабмит

In [21]:
model.eval()
test_loader = DataLoader(torch.utils.data.TensorDataset(X_test), batch_size=64, shuffle=False)

In [22]:
all_preds = []
with torch.no_grad():
    for (X_batch,) in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)

In [23]:
seqid_to_pred = dict(zip(test_ids, all_preds))
default_class = np.bincount(y_train.numpy()).argmax()

In [24]:
sample_sub = pd.read_csv('sample_submission.csv')
sample_sub['gesture'] = sample_sub['sequence_id'].apply(
    lambda sid: le.inverse_transform([seqid_to_pred.get(sid, default_class)])[0]
)
sample_sub.to_csv('submission_torch.csv', index=False)

# Второе решение: CNN + BiLSTM с Attention и CatBoost (Stacking)

Каждая последовательность сенсорных данных приводится к фиксированной длине и обрабатывается нейросетевой моделью CNN + двунаправленный LSTM с механизмом attention, где CNN извлекает локальные паттерны движения, а attention позволяет модели фокусироваться на наиболее информативных временных участках последовательности. Для повышения качества используются OOF-предсказания нейросети (softmax-вероятности), которые объединяются с простыми статистическими (handcrafted) признаками и подаются в CatBoost-классификатор в рамках стэкинга

**accuracy в kaggle: 0.916**

In [25]:
!pip install catboost



In [26]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier
from tqdm import tqdm

 Загрузка и подготовка данных

In [27]:
train_df = pd.read_csv('train.csv')
test_df  = pd.read_csv('test.csv')

le = LabelEncoder()
train_df['gesture_encoded'] = le.fit_transform(train_df['gesture'])
num_classes = len(le.classes_)

FEATURES = ['acc_x','acc_y','acc_z','rot_w','rot_x','rot_y','rot_z']
MAX_LEN = 128

In [28]:
def group_sequences(df, with_labels=True):
    out = {}
    for sid, g in df.groupby('sequence_id'):
        X = g[FEATURES].values.astype(np.float32)
        if with_labels:
            out[sid] = (X, g['gesture_encoded'].iloc[0])
        else:
            out[sid] = X
    return out

In [29]:
def pad(X):
    if len(X) >= MAX_LEN:
        return X[:MAX_LEN]
    return np.vstack([X, np.zeros((MAX_LEN-len(X), X.shape[1]), np.float32)])

In [30]:
train_seq = group_sequences(train_df, True)
test_seq  = group_sequences(test_df, False)

In [31]:
X_train, y_train, train_ids = [], [], []
for sid, (X,y) in train_seq.items():
    X_train.append(pad(X))
    y_train.append(y)
    train_ids.append(sid)

X_test, test_ids = [], []
for sid, X in test_seq.items():
    X_test.append(pad(X))
    test_ids.append(sid)

X_train = torch.tensor(np.stack(X_train))
y_train = torch.tensor(y_train)
X_test  = torch.tensor(np.stack(X_test))

Handcrafted признаки дополнительно к имеющимся

In [32]:
def handcrafted(X):
    return np.concatenate([
        X.mean(0),
        X.std(0),
        X.min(0),
        X.max(0)
    ])

In [33]:
hc_train = np.stack([handcrafted(x.numpy()) for x in X_train])
hc_test  = np.stack([handcrafted(x.numpy()) for x in X_test])

Attention, который должн помоч в выделении ключевых признаков

In [34]:
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        # x: (B, T, H)
        weights = torch.softmax(self.attn(x).squeeze(-1), dim=1)
        return torch.sum(x * weights.unsqueeze(-1), dim=1)

CNN + LSTM + Attention

In [35]:
class CNN_LSTM_Attn(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv1d(7, 64, 5, padding=2)
        self.conv2 = nn.Conv1d(64, 128, 3, padding=1)
        self.pool  = nn.AdaptiveMaxPool1d(1)

        self.lstm  = nn.LSTM(7, 64, batch_first=True, bidirectional=True)
        self.attn  = Attention(128)

        self.fc = nn.Linear(128 + 128, num_classes)

    def forward(self, x):
        # CNN
        x_t = x.permute(0,2,1)
        c = torch.relu(self.conv1(x_t))
        c = torch.relu(self.conv2(c))
        c = self.pool(c).squeeze(-1)

        # LSTM + Attention
        l, _ = self.lstm(x)
        a = self.attn(l)

        return self.fc(torch.cat([c, a], dim=1))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

OOF NN предсказания

In [36]:
skf = StratifiedKFold(5, shuffle=True, random_state=42)

oof_nn  = np.zeros((len(X_train), num_classes))
test_nn = np.zeros((len(X_test), num_classes))

In [37]:
for fold, (tr, val) in enumerate(skf.split(X_train, y_train)):
    print(f'\nFold {fold+1}')

    model = CNN_LSTM_Attn(num_classes).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    tr_loader = DataLoader(TensorDataset(X_train[tr], y_train[tr]), 32, True)
    val_loader = DataLoader(TensorDataset(X_train[val], y_train[val]), 64, False)
    test_loader = DataLoader(TensorDataset(X_test), 64, False)

    model.train()
    for epoch in range(20):
        for xb, yb in tr_loader:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            loss = loss_fn(model(xb), yb)
            loss.backward()
            opt.step()

    model.eval()
    with torch.no_grad():
        # OOF
        idx = 0
        for xb, yb in val_loader:
            xb = xb.to(device)
            probs = torch.softmax(model(xb),1).cpu().numpy()
            oof_nn[val[idx:idx+len(probs)]] = probs
            idx += len(probs)

        # TEST
        fold_test = []
        for xb, in test_loader:
            xb = xb.to(device)
            fold_test.append(torch.softmax(model(xb),1).cpu().numpy())
        test_nn += np.vstack(fold_test) / skf.n_splits


Fold 1

Fold 2

Fold 3

Fold 4

Fold 5


CatBoost stacking

In [38]:
meta_train = np.hstack([oof_nn, hc_train])
meta_test  = np.hstack([test_nn, hc_test])

cat = CatBoostClassifier(
    iterations=2000,
    depth=8,
    learning_rate=0.03,
    loss_function='MultiClass',
    eval_metric='Accuracy',
    random_seed=42,
    verbose=200
)

cat.fit(meta_train, y_train.numpy())

test_pred = cat.predict(meta_test).astype(int).flatten()

0:	learn: 0.7764706	total: 90.7ms	remaining: 3m 1s
200:	learn: 0.9921569	total: 27.4s	remaining: 4m 4s
400:	learn: 1.0000000	total: 45.1s	remaining: 2m 59s
600:	learn: 1.0000000	total: 1m 3s	remaining: 2m 27s
800:	learn: 1.0000000	total: 1m 20s	remaining: 2m
1000:	learn: 1.0000000	total: 1m 38s	remaining: 1m 38s
1200:	learn: 1.0000000	total: 1m 57s	remaining: 1m 18s
1400:	learn: 1.0000000	total: 2m 15s	remaining: 58.1s
1600:	learn: 1.0000000	total: 2m 33s	remaining: 38.3s
1800:	learn: 1.0000000	total: 2m 51s	remaining: 18.9s
1999:	learn: 1.0000000	total: 3m 9s	remaining: 0us


Сабмит

In [39]:
sub = pd.read_csv('sample_submission.csv')
pred_map = dict(zip(test_ids, test_pred))
default = np.bincount(y_train.numpy()).argmax()

sub['gesture'] = sub['sequence_id'].apply(
    lambda x: le.inverse_transform([pred_map.get(x, default)])[0]
)

sub.to_csv('submission_catboost_attention.csv', index=False)
print('Saved submission_catboost_attention.csv')

Saved submission_catboost_attention.csv
