In [35]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, confusion_matrix
import optuna
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
import pickle
import pennylane as qml
from pennylane import numpy as np_qml

In [36]:
df_train = pd.read_pickle('../data/train_data.pickle')

In [37]:
pca_pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('pca', PCA(n_components=2))  
])

cat_cols = ['Sex']
num_cols = ['Age']
timeseries_cols = [x for x in df_train.columns.tolist() if 'timestamp' in x]

In [38]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore', sparse_output=False), cat_cols),
        ('pca', pca_pipeline, timeseries_cols),
        ('num', 'passthrough', num_cols)  
    ],
    remainder='drop',  
    verbose_feature_names_out=False 
).set_output(transform="pandas") 

In [39]:
X_train = df_train[cat_cols+num_cols+timeseries_cols]

In [40]:
y_train = df_train.target

In [41]:
X_train_pca = preprocessor.fit_transform(X_train)

In [55]:
df_test = pd.read_pickle('../data/test_data.pickle')

X_test = df_test[num_cols+cat_cols+timeseries_cols]
y_test = df_test.target

In [56]:
X_test_pca = preprocessor.transform(X_test)

In [58]:
n_layers = 2
n_wires = 4

In [65]:
X_train_pca_vals = X_train_pca.values

In [69]:
y_train_vals = y_train.values

In [70]:
X_test_pca_vals = X_test_pca.values

In [71]:
y_test_vals = y_test.values

In [72]:
# → КРИТИЧЕСКИ ВАЖНО: форма weights = (n_layers, n_wires, 3)
weights = np_qml.random.random((n_layers, n_wires, 3), requires_grad=True)
print("✅ weights.shape =", weights.shape)  # ← должно быть (2, 4, 3)

# 3. Устройство и схема — с assert’ами для отладки
dev = qml.device("default.qubit", wires=n_wires)

@qml.qnode(dev, diff_method="backprop", interface="numpy")
def circuit(weights, x):
    # Проверим внутри, что x и weights в правильной форме
    assert x.shape == (n_wires,), f"x shape: {x.shape}, expected ({n_wires},)"
    assert weights.shape == (n_layers, n_wires, 3), f"weights shape: {weights.shape}"
    
    qml.AngleEmbedding(x, wires=range(n_wires), rotation='Y')
    qml.StronglyEntanglingLayers(weights, wires=range(n_wires))
    return qml.expval(qml.PauliZ(0))

# Пробный вызов — должен пройти без ошибок
test_pred = circuit(weights, X_train_pca.iloc[0])
print("✅ circuit test output:", test_pred)

✅ weights.shape = (2, 4, 3)
✅ circuit test output: -0.24161954419121512


  return [rotation(features[i], wires=wires[i]) for i in range(len(wires))]


In [75]:
opt = qml.AdamOptimizer(stepsize=0.05)
batch_size = 8
epochs = 1000

for epoch in range(epochs):
    indices = np.random.permutation(len(X_train))
    total_loss = 0.0

    for i in range(1, len(X_train), batch_size):
        batch_idx = indices[i:i + batch_size]
        X_batch = X_train_pca_vals[batch_idx]
        y_batch = y_train_vals[batch_idx]

        def cost_fn(w):
            preds = np_qml.array([circuit(w, x) for x in X_batch])
            return np_qml.mean((preds - np_qml.array(y_batch)) ** 2)

        # ⚠️ opt.step может "разгладить" форму → принудительно reshape после шага
        new_weights, loss = opt.step_and_cost(cost_fn, weights)
        
        # Восстанавливаем форму, если оптимизатор её нарушил (иногда бывает)
        if new_weights.shape != weights.shape:
            print(f"⚠️  Shape mismatch! Reshaping {new_weights.shape} → {weights.shape}")
            new_weights = np_qml.reshape(new_weights, weights.shape)
        
        weights = new_weights
        total_loss += loss

    # Оценка (как раньше)
    def predict(X):
        preds_cont = np.array([float(circuit(weights, x)) for x in X])
        return (np.sign(preds_cont) + 1) // 2

    bal_acc_train = balanced_accuracy_score((y_train + 1) // 2, predict(X_train_pca_vals))
    bal_acc_test = balanced_accuracy_score((y_test + 1) // 2, predict(X_test_pca_vals))

    if epoch % 50 == 0:
        print(f"Epoch {epoch:2d} | Loss: {total_loss:6.3f} | BalAcc train: {bal_acc_train:.3f} | test: {bal_acc_test:.3f}")

Epoch  0 | Loss: 31.979 | BalAcc train: 0.466 | test: 0.550
Epoch 50 | Loss: 32.139 | BalAcc train: 0.494 | test: 0.543
Epoch 100 | Loss: 32.234 | BalAcc train: 0.468 | test: 0.525
Epoch 150 | Loss: 32.000 | BalAcc train: 0.490 | test: 0.532
Epoch 200 | Loss: 32.344 | BalAcc train: 0.496 | test: 0.481
Epoch 250 | Loss: 31.947 | BalAcc train: 0.490 | test: 0.546
Epoch 300 | Loss: 31.883 | BalAcc train: 0.479 | test: 0.494
Epoch 350 | Loss: 32.014 | BalAcc train: 0.482 | test: 0.543
Epoch 400 | Loss: 32.118 | BalAcc train: 0.469 | test: 0.570
Epoch 450 | Loss: 32.229 | BalAcc train: 0.486 | test: 0.536
Epoch 500 | Loss: 32.289 | BalAcc train: 0.485 | test: 0.536
Epoch 550 | Loss: 32.004 | BalAcc train: 0.489 | test: 0.536
Epoch 600 | Loss: 32.091 | BalAcc train: 0.484 | test: 0.494
Epoch 650 | Loss: 31.981 | BalAcc train: 0.475 | test: 0.585
Epoch 700 | Loss: 32.057 | BalAcc train: 0.476 | test: 0.588
Epoch 750 | Loss: 32.281 | BalAcc train: 0.508 | test: 0.516
Epoch 800 | Loss: 32.144 |