In [1]:
import os

os.chdir("../")

In [2]:
from alm.lyrics import *
from alm.melody import *
from alm.comparator import *
from alm.utils import io
import pprint

In [3]:
popular_dir = "xmls/popular/"
popular_songs = [
    "キセキ_A1", "キセキ_A2", "キセキ_S1", "キセキ_S2",
    "愛唄_A1", "愛唄_A2", "愛唄_S1", "愛唄_S2",
    "花唄_A1", "花唄_A2", "花唄_S1", "花唄_S2",
    "遥か_A1", "遥か_A2", "遥か_A3", "遥か_A4", "遥か_S1", "遥か_S2",
    "オレンジ_A1", "オレンジ_A2", "オレンジ_S1", "オレンジ_S2", "オレンジ_S3", "オレンジ_S4",
    "刹那_A", "刹那_S",
    "星影のエール_A", "星影のエール_S",
    "扉_A", "扉_S",
    "歩み_A", "歩み_S",
    "旅立ち_A", "旅立ち_S",
    "涙空_A", "涙空_S",
    "BE_FREE_A", "BE_FREE_S",
]

unpopular_dir = "xmls/unpopular/"
unpopular_songs = [
    "人_A1", "人_A2", "人_S1", "人_S2",
    "beautiful_days_A1", "beautiful_days_A2", "beautiful_days_S1", "beautiful_days_S2",
    "たけてん_A1", "たけてん_A2", "たけてん_S1", "たけてん_S2",
    "ビリーヴ_A1", "ビリーヴ_A2", "ビリーヴ_S1", "ビリーヴ_S2",
    "またね。_A", "またね。_S",
    "ルーキーズ_A", "ルーキーズ_S",
    "君想い_A", "君想い_S",
    "SUN_SHINE!!!_A", "SUN_SHINE!!!_S",
    "サヨナラから始めよう_A", "サヨナラから始めよう_S",
    "no_more_war_A", "no_more_war_S",
    # "地球号_A", "地球号_S",
]

In [4]:
def calc_word_match_rate(dir_path: str, songs_list: list, *, word_match_rate_list:list = []):
    parser = grammar_parser.GrammarParser("ja_ginza")

    res = word_match_rate_list

    for song_name in songs_list:
        mscx_path = dir_path + song_name + ".xml"
        ts_path = dir_path + song_name + "_TS.xml"

        lyrics_notes_map = lyrics_extractor.extract_lyrics(mscx_path)
        doc = parser.parse(lyrics_notes_map[lyrics_extractor.LYRICS_KEY])
        lyrics_tree = parser.to_tree(doc)
        words_notes_map = {}
        associating_lyrics_melody.explore_words_in_tree(lyrics_tree, words_notes_map)
        words_list = associating_lyrics_melody.associate_word_list_notes(words_notes_map, lyrics_notes_map)

        melody_tree = time_span_tree.time_span_tree_to_dict(ts_path)

        rate = word_match_rate_calculator.calc_word_match_rate(words_list, melody_tree)
        rate .section_name = song_name

        res.append([rate.section_name, rate.words_number, rate.match_words_number, rate.words_number - rate.match_words_number, rate.match_rate])
    
    return res

In [5]:
res = calc_word_match_rate(popular_dir, popular_songs)
res = calc_word_match_rate(unpopular_dir, unpopular_songs, word_match_rate_list=res)
io.output_csv("./notebooks/word_match_rate.csv", ["セクション名", "単語数", "一致した単語数", "一致しなかった単語数", "一致率"], res)
pprint.pprint(res)

[['キセキ_A1', 12, 8, 4, 0.6666666666666666],
 ['キセキ_A2', 13, 8, 5, 0.6153846153846154],
 ['キセキ_S1', 13, 9, 4, 0.6923076923076923],
 ['キセキ_S2', 12, 10, 2, 0.8333333333333334],
 ['愛唄_A1', 18, 11, 7, 0.6111111111111112],
 ['愛唄_A2', 21, 16, 5, 0.7619047619047619],
 ['愛唄_S1', 15, 7, 8, 0.4666666666666667],
 ['愛唄_S2', 14, 7, 7, 0.5],
 ['花唄_A1', 22, 15, 7, 0.6818181818181818],
 ['花唄_A2', 20, 15, 5, 0.75],
 ['花唄_S1', 15, 12, 3, 0.8],
 ['花唄_S2', 14, 12, 2, 0.8571428571428571],
 ['遥か_A1', 6, 4, 2, 0.6666666666666666],
 ['遥か_A2', 11, 5, 6, 0.45454545454545453],
 ['遥か_A3', 12, 11, 1, 0.9166666666666666],
 ['遥か_A4', 10, 7, 3, 0.7],
 ['遥か_S1', 18, 16, 2, 0.8888888888888888],
 ['遥か_S2', 12, 8, 4, 0.6666666666666666],
 ['オレンジ_A1', 11, 10, 1, 0.9090909090909091],
 ['オレンジ_A2', 22, 16, 6, 0.7272727272727273],
 ['オレンジ_S1', 11, 5, 6, 0.45454545454545453],
 ['オレンジ_S2', 15, 11, 4, 0.7333333333333333],
 ['オレンジ_S3', 22, 16, 6, 0.7272727272727273],
 ['オレンジ_S4', 11, 10, 1, 0.9090909090909091],
 ['刹那_A', 24, 18, 6,