In [2]:
pip install markovify

Collecting markovify
  Downloading markovify-0.9.0.tar.gz (27 kB)
Collecting unidecode
  Downloading Unidecode-1.2.0-py2.py3-none-any.whl (241 kB)
Building wheels for collected packages: markovify
  Building wheel for markovify (setup.py): started
  Building wheel for markovify (setup.py): finished with status 'done'
  Created wheel for markovify: filename=markovify-0.9.0-py3-none-any.whl size=18480 sha256=49d25cf7bda77d9f06053bb47d5dac1545f31b294f73f461dd7de82e636a1919
  Stored in directory: c:\users\hayde\appdata\local\pip\cache\wheels\cf\19\41\0f8707b2305726fadbd92649dcdb28d98a04e159eb24dd72f0
Successfully built markovify
Installing collected packages: unidecode, markovify
Successfully installed markovify-0.9.0 unidecode-1.2.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import sys
import os
import numpy as np
import pandas as pd
import json
import pickle
from zipfile import ZipFile
import sklearn
from sklearn.model_selection import train_test_split
import markovify as mk

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\Map_Processing")

from analyze_notes import get_notes_as_strings 

In [2]:
# Get maps dataframe from the pickle file
maps_df = pd.read_pickle("../Data_Gather_Filter_Download/downloaded_maps_df.pkl")

In [3]:
#==================================== Dataset Settings ====================================#
# Note: We have to use a subset of all our songs as 15k maps would take days to train
total_data_size = 1000  # Number of maps to use in across all datasets
val_split = 0.15        # Percentage of data put into validation set
test_split = 0.15       # Percentage of data put into testing set
#==========================================================================================#

# Split our data into training and test/val which we will split again
train_data, val_test_data = train_test_split(maps_df[:total_data_size], test_size=val_split + test_split)

# Split the validation and testing data apart into their own respective sets
val_data, test_data = train_test_split(val_test_data, test_size=(test_split/(val_split + test_split)))

print("Number of training maps: {}. Numer of validation maps: {}. Number of testing maps: {}".format(len(train_data), len(val_data), len(test_data)))

Number of training maps: 700. Numer of validation maps: 150. Number of testing maps: 150


In [4]:
# Get training data into format that can be read by model
all_train_songs_as_strings = [] # List of list of strings which represent notes
for map_data in train_data.itertuples():
    file_path = map_data.file_path
    if file_path != "NOT_FOUND":
        with ZipFile("../Data_Gather_Filter_Download/{}".format(file_path)) as folder:
            try:
                # Open the dat file for the difficulty
                if map_data.difficulty == 'expert':
                    difficulty_dat = "Expert.dat"
                else:
                    difficulty_dat = "ExpertPlus.dat"
                with folder.open(difficulty_dat) as diff_dat:
                    dat_json = json.load(diff_dat)
                    # Get the notes at time points in a song as a string 
                    notes_as_strings = get_notes_as_strings(dat_json)
                    all_train_songs_as_strings.append(notes_as_strings)
            except Exception as e:
                print(e, file_path)

In [5]:
%%time
#==================================== Training Settings ===================================#
state_size = 4      # The number of prior notes it will consider with every note
#==========================================================================================#
# Train the Markov Chain
markov_chain = mk.Chain(all_train_songs_as_strings, state_size=state_size)

Wall time: 502 ms


In [6]:
# Get a sequence of notes from the HMM and see how they are
num_notes = 100
notes_list = markov_chain.walk()
while len(notes_list) < num_notes:
    notes_list = markov_chain.walk() # Adds a new note every time

In [8]:
for notes in notes_list[:5]:
    print(notes)

0,1,0,0,9,9,9,9,1,1,0,3,9,9,9,9
0,0,1,0,9,9,9,9,1,0,1,3,9,9,9,9
0,7,0,0,9,9,9,9,1,7,0,3,9,9,9,9
0,2,0,0,9,9,9,9,9,9,9,9,9,9,9,9
0,3,1,0,9,9,9,9,9,9,9,9,9,9,9,9
