In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/My\ Drive/36490

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
/content/drive/My Drive/36490


In [0]:
import numpy as np
import pickle as pkl
import datetime as dt
import matplotlib.pyplot as plt

In [0]:
'''Samples to Training/Testing Data Functions'''
def shuffle(X, y):
    y = np.array(y)
    idx = np.random.permutation(y.shape[0])
    return X[idx], y[idx]

def samples_2_data(ldf, lbls, ratio):
    X_train, X_test, y_train, y_test = [], [], [], []
    for i in range(len(ldf)):
        n = ldf[i].shape[0]
        cut = int(ratio*n)
        if len(y_train) == 0:
            X_train = ldf[i][:cut,]
            X_test = ldf[i][cut:,]
            y_train = [lbls[i]]*cut
            y_test = [lbls[i]]*(n-cut)
        else:
            X_train = np.vstack([X_train, ldf[i][:cut,]])
            X_test = np.vstack([X_test, ldf[i][cut:,]])
            y_train.extend([lbls[i]]*cut)
            y_test.extend([lbls[i]]*(n-cut))
    X_train, y_train = shuffle(X_train, y_train)
    X_test, y_test = shuffle(X_test, y_test)
    return X_train, y_train, X_test, y_test

def no_neg(x):
    return np.abs(x)

def unpack_sample(s):
    data = list(s)
    data = [np.ndarray.flatten(i) for i in data]
    data = np.asarray(data)
    return data

In [0]:
'''Important Variable Initializations'''
### Sampling Sizes
n_osc = 15
n_samples = 10000
n_acc_samples = 250

In [0]:
'''Creating Data Given Distributions'''
def simData(n_osc, n_samples, mu_b, mu_p, sigma_b, sigma_p, sigma_osc, train_ratio = 0.75):
    ### Distribution Samples
    b_centers = np.random.multivariate_normal(mu_b, sigma_b, size=[n_samples,])
    p_centers = np.random.multivariate_normal(mu_p, sigma_p, size=[n_samples,])
    ### Quick'n'Dirty way of getting rid of negatives
    b_centers = np.asarray(list(map(no_neg, b_centers)))
    p_centers = np.asarray(list(map(no_neg, p_centers)))
    ### Getting Samples
    b_samples = map(lambda x: np.random.multivariate_normal(x, sigma_osc, size=[n_osc, ]), b_centers)
    p_samples = map(lambda x: np.random.multivariate_normal(x, sigma_osc, size=[n_osc, ]), p_centers)
    ### Unpacking Samples
    b_data = unpack_sample(b_samples)
    p_data = unpack_sample(p_samples)
    ### Convert Samples to Dataframes
    X_tr, y_tr, X_te, y_te = samples_2_data([b_data, p_data], [1, 0], train_ratio)
    return X_tr, y_tr, X_te,y_te

In [0]:
### Canonical Block Metrics (For Slow Pathway Pretraining and Canonical Block)
mu_b = np.array([0.25, 0.2])
mu_p = np.array([0.75, 0.8])
sigma_b = np.array([[0.006, 0], [0, 0.016]])
sigma_p = np.array([[0.006, 0], [0, 0.016]])
sigma_osc = np.array([[0.04, 0], [0, 0.04]])
pre_file = "./Data/pretrain.pkl"
canon_file = "./Data/canonical.pkl"
### Reverse Block Metrics (Accent for Normal Training)
mu_rev_b = np.array([0.25, 0.8])
mu_rev_p = np.array([0.75, 0.2])
rev_file = "./Data/rev1.pkl"
rev2_file = "./Data/rev2.pkl"

In [0]:
#Pretraining
np.random.seed(10)
pr_data = simData(n_osc, n_samples, mu_b, mu_p, sigma_b, sigma_p, sigma_osc)
pkl.dump(pr_data, open(pre_file, "wb"))

In [0]:
#Canonical Training
np.random.seed(10000000)
canon_data = simData(n_osc, n_acc_samples, mu_b, mu_p, sigma_b, sigma_p, sigma_osc)
pkl.dump(canon_data, open(canon_file, "wb"))

In [0]:
#Reverse Training 1
np.random.seed(20000000)
r1_data = simData(n_osc, n_acc_samples, mu_p, mu_b, sigma_b, sigma_p, sigma_osc)
pkl.dump(r1_data, open(rev_file, "wb"))

In [0]:
#Reverse Training 2
np.random.seed(30000000)
r2_data = simData(n_osc, n_acc_samples, mu_p, mu_b, sigma_b, sigma_p, sigma_osc)
pkl.dump(r2_data, open(rev2_file, "wb"))

In [0]:
#Edge Case Testing
mu_short = np.array([0.5, 0.2])
mu_long = np.array([0.5, 0.8])
sigma_short = np.array([[0, 0], [0, 0.016]])
sigma_long = np.array([[0, 0], [0, 0.016]])
sigma_osc = np.array([[0, 0], [0, 0.04]])
test_short = simData(n_osc, 500, mu_short, mu_short, sigma_short, sigma_short, sigma_osc, train_ratio = 1)
test_long = simData(n_osc, 500, mu_long, mu_long, sigma_long, sigma_long, sigma_osc, train_ratio = 1)
#Loaded all points into the training set. Don't care about the labels or testing df
pkl.dump(test_short[0], open("./Data/low_d2_test.pkl", "wb"))
print("Pickled")
pkl.dump(test_long[0], open("./Data/high_d2_test.pkl", "wb"))
print("Pickled")

Pickled
Pickled
