In [1]:
import pandas as pd
import numpy as np
import pickle
import markovify
import os

# Hidden Markov Model development

This notebook attempts to develop HMM for generating beat map sequences in a realistic manner. Goals include:
- Process note data into "words" that can be read by markovify
- Train HMMs for each difficulty level
- Generate new sequences

In [9]:
with open('../level_df/17e9_expert.pkl', 'rb') as f:
    df = pickle.load(f)

In [10]:
df

Unnamed: 0,_time,0,1,2,3,4,5,6,7,8,...,notes_type_1,notes_cutDirection_1,notes_lineIndex_3,notes_lineLayer_3,notes_type_3,notes_cutDirection_3,notes_lineIndex_0,notes_lineLayer_0,notes_type_0,notes_cutDirection_0
0,0.000,0.215631,0.283293,0.044722,0.314827,0.456092,0.184625,0.258735,0.500755,0.410239,...,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
1,1.000,0.092759,0.181363,0.310615,0.202370,0.113355,0.235167,0.532198,1.000000,0.499534,...,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
2,2.000,0.214088,0.358335,0.483347,0.330122,0.195144,0.252304,0.212993,0.173566,0.297780,...,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
3,3.000,1.000000,0.785022,0.325506,0.198619,0.278792,0.230358,0.251965,0.368830,0.383931,...,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
4,3.312,0.160311,0.118914,0.104925,0.111292,0.170624,0.175124,0.141467,0.130013,0.423844,...,1.0,0.0,0.0,2.0,3.0,3.0,1.0,2.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,66.312,,,,,,,,,,...,1.0,0.0,0.0,1.0,3.0,6.0,1.0,0.0,0.0,1.0
172,66.312,,,,,,,,,,...,1.0,0.0,0.0,0.0,3.0,6.0,1.0,0.0,0.0,1.0
173,66.312,,,,,,,,,,...,1.0,0.0,3.0,2.0,3.0,6.0,1.0,0.0,0.0,1.0
174,66.312,,,,,,,,,,...,1.0,0.0,3.0,1.0,3.0,6.0,1.0,0.0,0.0,1.0


In [5]:
df_notes = df.iloc[:, 13:]

In [25]:
df_notes.drop(index = df_notes[(df_notes == 999).all(axis = 1)].index, axis = 0, inplace = True)

In [27]:
df_notes.reset_index(drop = True, inplace = True)

In [39]:
seq = []
for index, row in df_notes.iterrows():
    values = {}
    for x in df_notes.columns:
        values.update({x: int(row[x])})
    if 'notes_type_3' not in list(values.keys()):
        values.update({'notes_type_3': 999})
        values.update({'notes_lineIndex_3': 999})
        values.update({'notes_lineLayer_3': 999})
        values.update({'notes_cutDirection_3': 999})
    word = f"{values['notes_type_0']},{values['notes_lineIndex_0']},{values['notes_lineLayer_0']},{values['notes_cutDirection_0']},{values['notes_type_1']},{values['notes_lineIndex_1']},{values['notes_lineLayer_1']},{values['notes_cutDirection_1']},{values['notes_type_3']},{values['notes_lineIndex_3']},{values['notes_lineLayer_3']},{values['notes_cutDirection_3']}"
    seq.append(word)    

In [11]:
def make_sequence(df):
    """Returns a sequence of 'words' that describe the placement and type of blocks for use in a HMM generator."""
    df_notes = df.iloc[:, 13:]
    df_notes.drop(index = df_notes[(df_notes == 999).all(axis = 1)].index, axis = 0, inplace = True)
    df_notes.reset_index(drop = True, inplace = True)
    seq = []
    for index, row in df_notes.iterrows():
        values = {}
        for x in df_notes.columns:
            values.update({x: int(row[x])})
        if 'notes_type_3' not in list(values.keys()):
            values.update({'notes_type_3': 999})
            values.update({'notes_lineIndex_3': 999})
            values.update({'notes_lineLayer_3': 999})
            values.update({'notes_cutDirection_3': 999})
        elif 'notes_type_0' not in list(values.keys()):
            values.update({'notes_type_0': 999})
            values.update({'notes_lineIndex_0': 999})
            values.update({'notes_lineLayer_0': 999})
            values.update({'notes_cutDirection_0': 999})
        elif 'notes_type_1' not in list(values.keys()):
            values.update({'notes_type_1': 999})
            values.update({'notes_lineIndex_1': 999})
            values.update({'notes_lineLayer_1': 999})
            values.update({'notes_cutDirection_1': 999})
        word = f"{values['notes_type_0']},{values['notes_lineIndex_0']},{values['notes_lineLayer_0']},{values['notes_cutDirection_0']},{values['notes_type_1']},{values['notes_lineIndex_1']},{values['notes_lineLayer_1']},{values['notes_cutDirection_1']},{values['notes_type_3']},{values['notes_lineIndex_3']},{values['notes_lineLayer_3']},{values['notes_cutDirection_3']}"
        seq.append(word)
    return seq

In [4]:
def generate_corpus(difficulty):
    
    corpus = []
    filelist = [f for f in os.listdir('../level_df')]
    for f in filelist:
        if f.endswith(f"{difficulty}.pkl"):
            with open(f"../level_df/{f}", 'rb') as d:
                df = pickle.load(d)
            seq = make_sequence(df)
            corpus.append(seq)
    return corpus
        

In [5]:
def train_HMM(corpus):
    
    MC = markovify.Chain(corpus, 5)
    return MC

In [6]:
def HMM(difficulty):
    corpus = generate_corpus(difficulty)
    MC = train_HMM(corpus)
    return MC

In [12]:
difficulties = ['easy', 'normal', 'hard', 'expert', 'expertPlus']
for difficulty in difficulties:
    MC = HMM(difficulty)
    with open(f"../models/HMM_{difficulty}_v2.pkl", 'wb') as f:
        pickle.dump(MC, f)

In [80]:
MC_hard = HMM('hard')

In [82]:
walk = MC_hard.walk()

In [92]:
def walk_to_df(walk):
    
    sequence = []
    for step in walk:
        sequence.append(step.split(","))
    constant = ['notes_type_0', 'notes_lineIndex_0', 'notes_lineLayer_0',
                    'notes_cutDirection_0', 'notes_type_1', 'notes_lineIndex_1', 'notes_lineLayer_1', 
                    'notes_cutDirection_1', 'notes_type_3', 'notes_lineIndex_3',
                    'notes_lineLayer_3', 'notes_cutDirection_3']
    df = pd.DataFrame(sequence, columns = constant)
    return df

In [86]:
sequence = []
for step in walk:
    sequence.append(step.split(","))

In [87]:
sequence

[['999', '999', '999', '999', '1', '3', '1', '3', '999', '999', '999', '999'],
 ['0', '0', '1', '2', '999', '999', '999', '999', '999', '999', '999', '999'],
 ['999', '999', '999', '999', '1', '3', '0', '7', '999', '999', '999', '999'],
 ['0', '2', '0', '7', '999', '999', '999', '999', '999', '999', '999', '999'],
 ['0', '1', '0', '0', '1', '2', '0', '0', '999', '999', '999', '999'],
 ['0', '0', '0', '6', '1', '1', '0', '6', '999', '999', '999', '999'],
 ['0', '1', '0', '0', '1', '2', '0', '0', '999', '999', '999', '999'],
 ['0', '2', '0', '7', '1', '3', '0', '7', '999', '999', '999', '999'],
 ['0', '1', '0', '0', '1', '2', '0', '0', '999', '999', '999', '999'],
 ['0', '0', '1', '2', '1', '3', '1', '3', '999', '999', '999', '999'],
 ['0', '0', '0', '1', '1', '3', '0', '1', '999', '999', '999', '999'],
 ['0', '2', '0', '0', '1', '3', '0', '0', '999', '999', '999', '999'],
 ['0', '1', '0', '6', '1', '2', '0', '6', '999', '999', '999', '999'],
 ['0', '0', '0', '0', '1', '1', '0', '0', '99

In [89]:
constant = ['notes_type_0', 'notes_lineIndex_0', 'notes_lineLayer_0',
                    'notes_cutDirection_0', 'notes_type_1', 'notes_lineIndex_1', 'notes_lineLayer_1', 
                    'notes_cutDirection_1', 'notes_type_3', 'notes_lineIndex_3',
                    'notes_lineLayer_3', 'notes_cutDirection_3']

In [91]:
pd.DataFrame(sequence, columns = constant)

Unnamed: 0,notes_type_0,notes_lineIndex_0,notes_lineLayer_0,notes_cutDirection_0,notes_type_1,notes_lineIndex_1,notes_lineLayer_1,notes_cutDirection_1,notes_type_3,notes_lineIndex_3,notes_lineLayer_3,notes_cutDirection_3
0,999,999,999,999,1,3,1,3,999,999,999,999
1,0,0,1,2,999,999,999,999,999,999,999,999
2,999,999,999,999,1,3,0,7,999,999,999,999
3,0,2,0,7,999,999,999,999,999,999,999,999
4,0,1,0,0,1,2,0,0,999,999,999,999
...,...,...,...,...,...,...,...,...,...,...,...,...
640,0,1,0,1,1,2,0,1,999,999,999,999
641,0,1,0,0,1,2,0,0,999,999,999,999
642,0,0,0,1,1,3,0,1,999,999,999,999
643,0,0,2,0,1,3,2,0,999,999,999,999
