In [4]:
import sys
import os
import numpy as np
import pandas as pd
from collections import Counter 
import json
import pickle
from zipfile import ZipFile

In [4]:
note_types = { # Direction is the direction from which one must cut the note 
    0   : "No Note",
    1   : "Red Up",
    2   : "Red Down",
    3   : "Red Right",
    4   : "Red Left",
    5   : "Red Down-Right",
    6   : "Red Down-Left",
    7   : "Red Up-Right",
    8   : "Red Up-Left",
    9   : "Red No Dir",
    10  : "Blue Up",
    11  : "Blue Down",
    12  : "Blue Right",
    13  : "Blue Left",
    14  : "Blue Down-Right",
    15  : "Blue Down-Left",
    16  : "Blue Up-Right",
    17  : "Blue Up-Left",
    18  : "Blue No Dir",
}

In [5]:
# Compute the note placements in one beat map and return a list of placements
def compute_note_placements(dat_json, file_path):
    # List of all notes, not grouped with notes at same times
    notes_list = dat_json['_notes']
    # List of all unique time points that notes are at
    note_timings = set([note['_time'] for note in notes_list])
    # Dictonary mapping time point to list of notes at that time. Beat saber has 3x4 grid of note positions (=12)
    notes_at_time_point = {note_timing : [0] * 12 for note_timing in note_timings}
    for note in notes_list:
        # 0 - Red, 1 - Blue
        colour = note['_type'] 
        # If it is a bomb then skip as our model doesn't deal with bombs
        if colour not in [0, 1]:
            continue
        # Direction is direction you must cut the note 
        # 0 - Up, 1 - Down, 2 - Right, 3 - Left,
        # 4 - Down-Right, 5 - Down-Left, 6 - Up-Right, 7 - Up-Left
        # 8 - No Direction
        direction = note['_cutDirection']

        # Integer classification based on colour and direction. (Colour * 9 since 9 directions per colour)
        note_type = colour * 9 + direction + 1 # Plus 1 to account for 0 being no note
        
        # Ranges from 0 to 2 (3x4 grid)
        row = note['_lineLayer']
        # Ranges from 0 to 3 (3x4 grid)
        col = note['_lineIndex']
        # Convert grid location to 1D array location
        grid_index = row * 4 + col
        # Prevent mapping and noodle extensions maps from indexing out of bounds (indexes can be negative in these extenstions)
        if abs(grid_index) > 11: 
            continue # These arent actually notes but something else in mapping extensions
        # Update the dictionary with the location and type of note (convert grid to 1D array location)
        try:
            notes_at_time_point[note['_time']][grid_index] = note_type
        except Exception as e:
            print(e, "row {}, col {}, file path {}, note {}".format(row, col, file_path, note))
    
    # Convert to list of tuples for the counter
    placement_list = [tuple(placements) for placements in list(notes_at_time_point.values())]
    # print("Number of unique placements found in song: {}".format(len(set(placement_list))))
    return placement_list


In [174]:
# Compute the most common note placements at time intervals across many beat maps
def compute_most_common_note_placements(maps_df, num_placements, max_maps=7000):
    maps_processed = 0
    # Counter of tuples representing the placement and types of blocks
    total_placements = Counter()
    for file_path in maps_df.file_path.unique():
        if maps_processed > max_maps:
            break
        if file_path != "NOT_FOUND":
            with ZipFile("../Data_Gather_Filter_Download/{}".format(file_path)) as folder:
                filenames = folder.namelist()
                difficulties_dats = [diff for diff in filenames if diff in ["Expert.dat", "ExpertPlus.dat"]]
                for difficulty_dat in difficulties_dats:
                    with folder.open(difficulty_dat) as diff_dat:
                        dat_json = json.load(diff_dat)
                        placement_list = compute_note_placements(dat_json, file_path)
                        total_placements.update(placement_list)
                        maps_processed += 1
    # Done gathering most common placements
    most_common_placements = [placement for placement, count in total_placements.most_common(num_placements)]
    
    # Determine the total number of unique placements we found
    total_unique_placements = len(total_placements)
    print("Total number of unique placements found: {}. ".format(total_unique_placements))

    # Determine what percent of placements we're removing
    total_placements_stored = sum(total_placements.values())
    num_most_common_placements = sum([count for placement, count in total_placements.most_common(num_placements)])
    print("Percentage of placements kept in most common: {}%".format((num_most_common_placements / total_placements_stored) * 100))

    return most_common_placements

In [170]:
# Get maps dataframe from the pickle file
maps_df = pd.read_pickle("../Data_Gather_Filter_Download/downloaded_maps_df.pkl")

In [176]:
#====================================== Settings ======================================#
num_most_commmon_placements = 2000  # Number of placements we will save for later
max_maps_to_process =  16000        # Number of maps to process for the data
#======================================================================================#

# Calculate the n most common note placements
most_common_placements = compute_most_common_note_placements(maps_df, num_most_commmon_placements, max_maps_to_process)

# Insert state of no notes at start since once we start sampling that will be the most common state
most_common_placements.insert(0, [0] * 12)

# Save the most common placements so we can use them as encodings for our model
with open('most_common_placements.pkl', 'wb') as f:
    pickle.dump(most_common_placements, f)

Total number of unique placements found: 12037. 
Percentage of placements kept in most common: 99.44806660987489%


In [179]:
print(len(most_common_placements))
for i, placement in enumerate(most_common_placements[:10]):
    print("\nMost common placement {}".format(i + 1))
    for row in range(2, -1, -1):
        i = row * 4
        print("{:^20s} {:^20s} {:^20s} {:^20s}".format(note_types[placement[i]], note_types[placement[i + 1]], note_types[placement[i + 2]], note_types[placement[i + 3]]))
    

2001

Most common placement 1
      No Note              No Note              No Note              No Note       
      No Note              No Note              No Note              No Note       
      No Note              No Note              No Note              No Note       

Most common placement 2
      No Note              No Note              No Note              No Note       
      No Note              No Note              No Note              No Note       
      No Note              No Note             Blue Down             No Note       

Most common placement 3
      No Note              No Note              No Note              No Note       
      No Note              No Note              No Note              No Note       
      No Note              Red Down             No Note              No Note       

Most common placement 4
      No Note              No Note              No Note              No Note       
      No Note              No Note              No Note