In [2]:
import sys
import os
import json
import torch
import math
import importlib.util
import random
from PIL import Image
from matplotlib import pyplot as plt 
from pathlib import Path

In [54]:
# Import some modules
# Get the current working directory
current_working_directory = os.getcwd()
sys.path.append(current_working_directory+'/tools/')
sys.path.append(current_working_directory+'/model/')
from ChartRatingBinder import extract, bind_rate
from ChartHandler import chartDecomposer, bpmTotimeConverter, noteTokenizer
from ChartStats import chartStats
from TAMAMo import TokenAlignedMaimaiAnalyzerModel

### This section is to tokenize the dataset.

In [97]:
# Setting up payhs for files
folder_path = r'../maimai2/'
rating_file = r'乐谱.csv'
save_dir_balanced = r'one_file_demo/token[13,15]_balanced.json'
save_dir_sub = r'one_file_demo/token[13.8,14.2].json'
info1 = [{'bound':[13,13.9], 'ratio':1}, {'bound':[14,15], 'ratio':2.5}]
info2 = [{'bound':[13.8,13.9], 'ratio':1}, {'bound':[14,14.2], 'ratio':1}]

In [99]:
def mix_data(data, info, padding_up_to, shuffle=True, display_info=False):
    # info[i]: [lower,upper]:typle/list, ratio:float/int
    temp = []
    output = []
    for i in range(len(info)):
        temp.append([])
        for song in data:
            if song['rating_num'] >= info[i]['bound'][0] and song['rating_num'] <= info[i]['bound'][1]:
                converter = bpmTotimeConverter(data[i])
                tokenizer = noteTokenizer(song)
                temp[-1].append(tokenizer.output(padding_up_to=padding_up_to))

        if display_info:
            print(f'Number of songs with in [{info[i]['bound'][0]}, {info[i]['bound'][1]}] is {len(temp[-1])}')
        whole_part = math.floor(info[i]['ratio'])
        partial = math.floor((info[i]['ratio'] - whole_part) * len(temp[-1]))
        output += temp[-1] * whole_part
        output += temp[-1][:partial]
        if display_info:
            print(f'Number of songs added within [{info[i]['bound'][0]}, {info[i]['bound'][1]}] is {len(temp[-1])*whole_part+partial}')
    random.shuffle(output)
    return output

In [107]:
# Tokenization
rating = bind_rate(rating_file)
dataset = extract(folder_path, rating)
processed_data = []
balanced_data = []
sub_data = []
print('Start extracting and binding rating with charts and primarily decompose the charts...')
for song in dataset:
    for diff in song['difficulty']:
        fullchart = song['difficulty'][diff]
        current = chartDecomposer()
        current.decompose(fullchart, song['name'])
        this_song_info = current.output_data()
        processed_data.append(this_song_info)
print('Extracting, binding and decomposing are done.')        

print('Start further handling the charts...')         
balanced_data = mix_data(processed_data, info1, 2200, display_info=True)
sub_data = mix_data(processed_data, info2, 2200, display_info=True)
print('Tokenization success')

print('Start saving token files...')
with open(save_dir_balanced, 'w') as fp:
    json.dump(balanced_data, fp)
fp.close()
with open(save_dir_sub, 'w') as fp:
    json.dump(sub_data, fp)
fp.close()

Start extracting and binding rating with charts and primarily decompose the charts...
Extracting, binding and decomposing are done.
Start further handling the charts...
Number of songs with in [13, 13.9] is 595
Number of songs added within [13, 13.9] is 595
Number of songs with in [14, 15] is 209
Number of songs added within [14, 15] is 522
Number of songs with in [13.8, 13.9] is 130
Number of songs added within [13.8, 13.9] is 130
Number of songs with in [14, 14.2] is 83
Number of songs added within [14, 14.2] is 83
Tokenization success
Start saving token files...


### This section is the first part of training.

In [111]:
# It will do training, so you need an Nvidia GPU or it will be super super slow.
# I have no idea on how it works on Mac since I never use Macintosh system.
# If you don't have access to an Nvidia GPU, please skip this part. A pretrained parameter file is already included.
!python tools\train.py --config configs\model_class_bound[14]_data[13,15]_h3_l3_balance.py --device cuda --save_dir one_file_demo/first_train/final.pth --valid True --lossplot True --dataset one_file_demo\token[13,15]_balanced.json

Start loading dataset...
Dataset loaded. 1117 samples in total.
Start training process...
Epoch [1/72], Train_loss: 0.6883, Valid_loss: 0.6814, Learning Rate: 0.001 at 2024-12-03 17:36:37.964317
Epoch [2/72], Train_loss: 0.6733, Valid_loss: 0.6618, Learning Rate: 0.001 at 2024-12-03 17:36:56.331850
Epoch [3/72], Train_loss: 0.6476, Valid_loss: 0.6056, Learning Rate: 0.001 at 2024-12-03 17:37:14.440605
Epoch [4/72], Train_loss: 0.5760, Valid_loss: 0.5519, Learning Rate: 0.001 at 2024-12-03 17:37:31.917190
Epoch [5/72], Train_loss: 0.5091, Valid_loss: 0.4987, Learning Rate: 0.001 at 2024-12-03 17:37:49.876275
Epoch [6/72], Train_loss: 0.4782, Valid_loss: 0.4426, Learning Rate: 0.001 at 2024-12-03 17:38:10.760964
Epoch [7/72], Train_loss: 0.4637, Valid_loss: 0.4302, Learning Rate: 0.001 at 2024-12-03 17:38:30.353655
Epoch [8/72], Train_loss: 0.4595, Valid_loss: 0.4465, Learning Rate: 0.001 at 2024-12-03 17:38:50.765001
Epoch [9/72], Train_loss: 0.4666, Valid_loss: 0.4611, Learning Rate: 0

In [4]:
# Open the image
image = Image.open(r'one_file_demo\first_train_loss.png')

# Display the image
image.show()

### This section is the second part of training.

In [8]:
# This also does training.
# Only execute it if you have an Nvidia GPU or you just want to waste your time.
# Again, no idea if this works or not on Mac!!!!!!!!!!
!python tools\train.py --config configs/model_class_[13.8,14.2]_r[130,80]_from_bound[14]_data[13,15]_h3_l3_balance.py --device cuda --save_dir one_file_demo/second_train/final.pth --valid True --lossplot True --dataset one_file_demo\token[13.8,14.2].json


Start loading dataset...
Dataset loaded. 213 samples in total.
Start training process...
Epoch [1/72], Train_loss: 1.4360, Valid_loss: 1.5477, Learning Rate: 0.0001 at 2024-12-03 18:57:59.629124
Epoch [2/72], Train_loss: 1.4000, Valid_loss: 1.5059, Learning Rate: 0.0001 at 2024-12-03 18:58:03.256420
Epoch [3/72], Train_loss: 1.3636, Valid_loss: 1.4657, Learning Rate: 0.0001 at 2024-12-03 18:58:06.953336
Epoch [4/72], Train_loss: 1.3284, Valid_loss: 1.4228, Learning Rate: 0.0001 at 2024-12-03 18:58:10.647365
Epoch [5/72], Train_loss: 1.2880, Valid_loss: 1.3446, Learning Rate: 0.0001 at 2024-12-03 18:58:14.349222
Epoch [6/72], Train_loss: 1.2045, Valid_loss: 1.1833, Learning Rate: 0.0001 at 2024-12-03 18:58:18.082035
Epoch [7/72], Train_loss: 1.1252, Valid_loss: 1.1263, Learning Rate: 0.0001 at 2024-12-03 18:58:21.761068
Epoch [8/72], Train_loss: 1.0927, Valid_loss: 1.0983, Learning Rate: 0.0001 at 2024-12-03 18:58:25.407713
Epoch [9/72], Train_loss: 1.0726, Valid_loss: 1.0745, Learning 

In [12]:
# Open the image
image = Image.open(r'one_file_demo\second_train_loss.png')

# Display the image
image.show()

### Evaluation Part
Pretrained parameter file will be used in this case.

In [18]:
# It takes time when ytou use CPU.
!python tools\test.py --config configs/model_class_[13.8,14.2]_r[130,80]_from_bound[14]_data[13,15]_h3_l3_balance.py --device cuda --std tokens_[13.8,14.2]_r[130,83].json --checkpoint checkpoints\model_class_[13.8,14.2]_r[130,83]_from_bound[14]_data[13,15]_h3_l3_balance_checkpoint.pth --dist \data

Start loading dataset...
Dataset loaded. 213 samples in total.
# of 13: 130
# of 14: 83
train_acc: 76.47%
valid_acc: 75.12%
acc: 75.12%
f1: 78.88%
positive_correct_rate: 76.15%
neagtive_correct_rate: 73.49%
true_positive: 99
true_negative: 61
false_positive: 22
false_negative: 31
Probability graph plotted and saved.


In [20]:
# Open the image
image = Image.open(r'data\distribution.png')

# Display the image
image.show()