In [None]:
def data_initialize(directory):
	X, Y = load_dataset(directory, 36000)
  
  ## Sample Run Parameters
  sample_period = 20 # downsample 1 out of every 20 time steps, 1000 Hz (original) => 50 Hz (sampled)
	window_size = 50 # 1 second of data   
	train_rate = 0.8

	X_1s = []
	Y_1s = []
	trial_idx = 0
	X_1s_predict = [] # used for computing maneuver predictions from trained model
	for x in X:
	  x_1s_predict = []
	  x_sampled = x.iloc[::sample_period, 1:] # downsampling and removing time column data 
	  for t in range(window_size-1, x_sampled.shape[0]): # creating sliding windows of data
		x_window = x_sampled.iloc[t-(window_size-1):t+1, :]
		X_1s.append(x_window)
		Y_1s.append(Y[trial_idx]) # storing correct maneuver label in larger Y_1s list
		x_1s_predict.append(x_window) # keeping data for each trial separate in X_1s_predict list
	  X_1s_predict.append(x_1s_predict)
	  trial_idx += 1

	# note, using iloc above to index pandas dataframe as described here: 
	# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html  

	assert(len(X_1s) == len(Y_1s))
	num_total_samples = len(X_1s)

	# checking indexing on Y labels
	length_third = int(len(Y_1s)/3)
	assert(Y_1s[0:length_third].count(0) == len(Y_1s[0:length_third]))
	assert(Y_1s[length_third:2*length_third].count(1) == len(Y_1s[length_third:2*length_third]))
	assert(Y_1s[2*length_third:].count(2) == len(Y_1s[2*length_third:]))

	# https://stackoverflow.com/questions/11765061/better-way-to-shuffle-two-related-lists
	X_1s_shuffled, Y_1s_shuffled = shuffle(X_1s, Y_1s) 

	X_train = X_1s_shuffled[0:int(train_rate*num_total_samples)]
	Y_train = Y_1s_shuffled[0:int(train_rate*num_total_samples)]
	X_test = X_1s_shuffled[int(train_rate*num_total_samples):]
	Y_test = Y_1s_shuffled[int(train_rate*num_total_samples):]
	assert(len(X_train)+len(X_test) == num_total_samples)
	assert(len(Y_train)+len(Y_test) == num_total_samples)

	X_train = np.array(X_train).reshape(len(X_train), X_train[0].shape[0], X_train[0].shape[1])
	X_test  = np.array(X_test).reshape(len(X_test), X_test[0].shape[0], X_test[0].shape[1])
	Y_train = to_categorical(np.array(Y_train).reshape(len(Y_train), 1))
	Y_test  = to_categorical(np.array(Y_test).reshape(len(Y_test), 1))    