In [1]:
import os
import joblib
from joblib import Parallel, delayed
import time
import pickle

import autograd.numpy as np
import autograd.numpy.random as npr
npr.seed(0)

from hmmlearn import hmm
import matplotlib.pyplot as plt
import numpy as np
from tkinter import *
from tkinter.filedialog import askopenfilename, askdirectory
from tqdm import tqdm
import scipy.stats as stats

In [2]:
filename = '../processed_data/m1_fr0.1hz_30hz_0s_7200s_0.1s_bins_nooverlap.sav'

with open(filename, 'rb') as file:
    spike_times_all_neurons = joblib.load(file)

# Convert to numpy array
spike_times_all_neurons = np.array(spike_times_all_neurons)

# Transpose into (71999, 90)
spike_times_all_neurons = spike_times_all_neurons.T

# Truncate two hour m1 recording to one hour l5 recording into (36000, 46), 
# 0th to 45th L5/6, 46th to 89th L2/3, 89th being the most shallow
spike_times_l5_neurons_1h = spike_times_all_neurons[:36000, :46]

# # (Number of 100-ms bins, Number of l2 neurons)
# T_1h, D_m1 = spike_times_all_neurons_1h.shape

# Z-score each neuron, i.e. firing rates of each neuron across time sum to 0
spike_times_l5_neurons_1h_zscored = stats.zscore(spike_times_l5_neurons_1h) # Default axis is 0

data = spike_times_l5_neurons_1h_zscored
print(f'Spike times are binned into shape for hmm: {data.shape}')

Spike times are binned into shape for hmm: (36000, 46)


In [3]:
model_dir = "../hmm_models/"

In [4]:
K = 16

In [5]:
# Load model
hmm_mrg = pickle.load(open(os.path.join(model_dir, f"1h_l5_possion_merged_{K}_latents" + ".hmm"), "rb"))
print(f'1h_l5_possion_merged_{K}_latents.hmm loaded.')

1h_l5_possion_merged_16_latents.hmm loaded.


In [6]:
hmm_mrg.score(data)

537309.4751941374

In [7]:
# Load model
hmm_dir = pickle.load(open(os.path.join(model_dir, f"1h_l5_possion_direct_{K}_latents" + ".hmm"), "rb"))
print(f'1h_l5_possion_direct_{K}_latents.hmm loaded.')

1h_l5_possion_direct_16_latents.hmm loaded.


In [8]:
hmm_dir.score(data)

209182.51092923767

## use random forest to measure the distinctiveness of clusters assigned by hmm model

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def rf_f1_score(hmm_model, data, limit_count=100, verbose=False):
    Z2 = hmm_model.predict(data)
    X = data.copy()
    y = Z2.copy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    if verbose:
        print(X.shape, y.shape, [len(np.where(y == s)[0]) for s in np.unique(Z2)])
    np.random.seed(2022)
    y_subsampled = []
    X_subsampled = []
    for s in np.unique(Z2):
        idx_s = np.where(y==s)[0]
        try:
            sampled_idx_s = np.random.choice(len(idx_s), limit_count, replace=False)
        except:
            sampled_idx_s = np.random.choice(len(idx_s), len(idx_s), replace=False)
        y_subsampled.append(y[idx_s[sampled_idx_s]])
        X_subsampled.append(X[idx_s[sampled_idx_s], :])
    if verbose:
        print(np.hstack(y_subsampled).shape, np.vstack(X_subsampled).shape)
    X_train, X_test, y_train, y_test = train_test_split(np.vstack(X_subsampled), np.hstack(y_subsampled), 
                                                        test_size=0.2, random_state=42)
    clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced_subsample', n_jobs=-1)
    clf.fit(X_train, y_train)
    predict = clf.predict(X_test)
    print(50*'=')
    print(f'average: {f1_score(y_test, predict, average="macro")}')
    print(50*'-')
    print(f'indiv:{f1_score(y_test, predict, average=None)}')
    print(50*'=')

## direct model best performance

In [10]:
rf_f1_score(hmm_dir, data, limit_count=1250, verbose=False)

average: 0.8222432682875296
--------------------------------------------------
indiv:[0.31578947 0.96862745 0.95145631 0.86307054 0.96280992 0.703125
 0.83916084 0.88328076 0.86556169 0.87356322 0.90909091 0.86680761
 0.75621891 0.94056848 0.8685259  0.58823529]


In [11]:
rf_f1_score(hmm_mrg, data, limit_count=1250, verbose=False)

average: 0.8953592845582208
--------------------------------------------------
indiv:[0.96732026 0.93574297 0.8981289  0.8677686  0.85375494 0.944
 0.9043152  0.879046   0.85591398 0.87712665 0.81725888 0.94653465
 0.88888889 0.88076923 0.88209607 0.92708333]


## merged model best performance

In [12]:
rf_f1_score(hmm_mrg, data, limit_count=1360, verbose=False)

average: 0.9000353583987244
--------------------------------------------------
indiv:[0.94197952 0.94074074 0.89795918 0.89579525 0.82142857 0.95378928
 0.9165247  0.90625    0.85714286 0.85869565 0.8        0.94639556
 0.92334495 0.9        0.88073394 0.95978552]


In [13]:
rf_f1_score(hmm_dir, data, limit_count=1360, verbose=False)

average: 0.8109200688711339
--------------------------------------------------
indiv:[0.35294118 0.9893617  0.94360902 0.80263158 0.95254237 0.66666667
 0.84722222 0.89570552 0.86779661 0.86880466 0.91139241 0.84615385
 0.74489796 0.94923858 0.86516854 0.47058824]
