In [2]:
pip install markovify

Collecting markovify
  Downloading markovify-0.9.0.tar.gz (27 kB)
Collecting unidecode
  Downloading Unidecode-1.2.0-py2.py3-none-any.whl (241 kB)
Building wheels for collected packages: markovify
  Building wheel for markovify (setup.py): started
  Building wheel for markovify (setup.py): finished with status 'done'
  Created wheel for markovify: filename=markovify-0.9.0-py3-none-any.whl size=18480 sha256=49d25cf7bda77d9f06053bb47d5dac1545f31b294f73f461dd7de82e636a1919
  Stored in directory: c:\users\hayde\appdata\local\pip\cache\wheels\cf\19\41\0f8707b2305726fadbd92649dcdb28d98a04e159eb24dd72f0
Successfully built markovify
Installing collected packages: unidecode, markovify
Successfully installed markovify-0.9.0 unidecode-1.2.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
import sys
import os
import numpy as np
import pandas as pd
import json
import pickle
from zipfile import ZipFile
from collections import OrderedDict
import librosa
import sklearn
from sklearn.model_selection import train_test_split
import markovify as mk

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\Map_Processing")
    sys.path.append(module_path+"\\Note_Timing")

# import importlib
# importlib.reload(sys.modules['analyze_notes'])
# importlib.reload(sys.modules['onset_detection'])

from analyze_notes import get_notes_as_strings 
from onset_detection import get_onset_times

In [2]:
# Get maps dataframe from the pickle file
maps_df = pd.read_pickle("../Data_Gather_Filter_Download/downloaded_maps_df.pkl")

In [35]:
# Load markov moodel from file if already trained
try:
    with open("markov_model.json", 'r') as markov_file:
        markov_chain_str = json.load(markov_file)
        markov_chain = mk.Chain.from_json(markov_chain_str)
except Exception as e:
    print("Couldn't open markov model from file. Error:", e)

In [86]:
#==================================== Dataset Settings ====================================#
# Note: We have to use a subset of all our songs as 15k maps would take days to train
total_data_size = 15000 # Number of maps to use in across all datasets
val_split = 0.01        # Percentage of data put into validation set
test_split = 0.01       # Percentage of data put into testing set
#==========================================================================================#

# Split our data into training and test/val which we will split again
train_data, val_test_data = train_test_split(maps_df[:total_data_size], test_size=val_split + test_split)

# Split the validation and testing data apart into their own respective sets
val_data, test_data = train_test_split(val_test_data, test_size=(test_split/(val_split + test_split)))

print("Number of training maps: {}. Numer of validation maps: {}. Number of testing maps: {}".format(len(train_data), len(val_data), len(test_data)))

Number of training maps: 14700. Numer of validation maps: 150. Number of testing maps: 150


In [87]:
%%time
# Get training data into format that can be read by model
all_train_songs_as_strings = [] # List of list of strings which represent notes
for map_data in train_data.itertuples():
    file_path = map_data.file_path
    if file_path != "NOT_FOUND":
        with ZipFile("../Data_Gather_Filter_Download/{}".format(file_path)) as folder:
            try:
                # Open the dat file for the difficulty
                if map_data.difficulty == 'expert':
                    difficulty_dat = "Expert.dat"
                else:
                    difficulty_dat = "ExpertPlus.dat"
                with folder.open(difficulty_dat) as diff_dat:
                    dat_json = json.load(diff_dat)
                    # Get the notes at time points in a song as a string 
                    notes_as_strings = get_notes_as_strings(dat_json)
                    all_train_songs_as_strings.append(notes_as_strings)
            except Exception as e:
                print(e, file_path)

Wall time: 8min 52s


In [101]:
%%time
#==================================== Training Settings ===================================#
state_size = 64         # The number of prior notes it will consider with every note
#==========================================================================================#
# Train the Markov Chain
markov_chain = mk.Chain(all_train_songs_as_strings, state_size=state_size)

Wall time: 1min 13s


In [105]:
with open("markov_model_64_state.json", 'w') as f:
    json.dump(markov_chain.to_json(), f)

MemoryError: 

In [3]:
# Returns file path to folder containing all files needed to play song made by model
def get_map_from_song(song_file, output_file_path='Generated_Maps/Expert.dat', start_time=2, bpm=0):
    try:
        with open("markov_model.json", 'r') as markov_file:
            markov_chain_str = json.load(markov_file)
            markov_chain = mk.Chain.from_json(markov_chain_str)
    except Exception as e:
        print("Couldn't open markov model from file. Error:", e)
        return

    # Get the onset times where we will place notes
    onset_times = get_onset_times(song_file, min_sep=0.1)
    num_before = len(onset_times)
    onset_times = np.delete(onset_times, np.where(onset_times <= start_time))
    print("Removed {} onset times for being before the specified start time".format(num_before - len(onset_times)))
    # If the bpm is not provided then we calculate it ourselves
    if bpm == 0:
        y, samp_rate = librosa.load(song_file)
        bpm = librosa.beat.tempo(y=y, sr=samp_rate)
        print("Got a bpm of {}".format(bpm))
    # Determine the notes we should place
    notes_list = markov_chain.walk()
    while len(notes_list) < len(onset_times):
        notes_list = markov_chain.walk()
    
    # Create dictionary with time key and notes values
    notes_at_times = OrderedDict(zip(onset_times, notes_list))
    notes_as_json = convert_notes_string_to_valid_json(notes_at_times, bpm)
    with open(output_file_path, 'w') as dat_file:
        dat_data = {"_version": "2.2.0",
                    "_customData": {
                        "_time": '',
                        "_BPMChanges": [],
                        "_bookmarks": []
                        },
                    "_events": [],
                    "_notes": notes_as_json,
                    "_obstacles": [],
                    "_waypoints": []
                    }
        json.dump(dat_data, dat_file)
    
    print("Number of notes placed: {}\nNumber of unique note placements: {}\nApprox. notes per second: {}".format(
            len(notes_as_json),
            len(set(notes_list)),
            len(notes_as_json) / np.amax(onset_times)
            )
        )



In [4]:
# Takes in ordered dictonary mapping time to notes string and returns list of json
def convert_notes_string_to_valid_json(notes_at_times, bpm):
    list_of_jsons = []
    for time_point, notes_string in notes_at_times.items():
        notes_list = [int(x) for x in notes_string.split(',')]
        assert len(notes_list) == 16
        # Go over the 4 notes in the list and 
        for note_num in range(4):
            try:
                note_info = notes_list[4 * note_num : 4 * (note_num + 1)]
                if note_info[0] not in [0, 1]: # No note
                    continue
                colour = note_info[0]
                direction = note_info[1]
                row = note_info[2]
                col = note_info[3]
                note_json = {"_time": (time_point / 60) * bpm, # Convert to beat timing
                            "_lineIndex": col,
                            "_lineLayer": row,
                            "_type": colour,
                            "_cutDirection": direction}
                list_of_jsons.append(note_json)
            except Exception as e:
                print(e, "note_num {}, max index {}".format(note_num, 4 * (note_num + 1)))
    return list_of_jsons

In [104]:
# Test our model
song_file_name = "(706a)_Redo_(TV_Size)_ReZero_Opening_-_Konomi_Suzuki"
# (7067)_Sorairo_Days_(TV_Size)_Gurren_Lagann_Opening_-_Shoko_Nakagawa

with ZipFile('../Data_Gather_Filter_Download/Zip_Songs_Data/{}.zip'.format(song_file_name)) as folder:
    folder.extract('song.egg')
    get_map_from_song('song.egg', start_time=0, bpm=190)
    os.remove('song.egg')

Removed 12 onset times for being within 0.1s of the next note
Removed 0 onset times for being before the specified start time
Number of notes placed: 405
Number of unique note placements: 104
Approx. notes per second: 4.475723513279445


In [None]:
# Get a sequence of notes from the HMM and see how they are
num_notes_test = 100
notes_list_test = markov_chain.walk()
while len(notes_list_test) < num_notes_test:
    notes_list_test = markov_chain.walk() # Adds a new note every time