# Rhyme Highlighter
### A project to highlight rap lyrics by syllable to visualize the rhyme schemes used by various rappers in various songs. Part of a larger project to analyze multiple aspects of Hip Hop music.

## Modules

In [1]:
import json
from random import choice, sample

import requests
import pandas as pd
import numpy as np
import re

In [2]:
# Importing NLTK related modules
import nltk
import nltk.corpus
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Yann\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Yann\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [3]:
# Importing Spacy related modules
import spacy
nlp = spacy.load('en_core_web_sm')

## Constants and Variables

In [4]:
# For Words API

ROOT_URL = "https://wordsapiv1.p.rapidapi.com/words/"
HEADER = {"X-RapidAPI-Key": "e180762d6amsh4110215b5d1eaa2p1c86bbjsn333829e4ac18",
         "X-RapidAPI-Host": "wordsapiv1.p.rapidapi.com"}

In [5]:
# NLTK Related
stopwords = stopwords.words('english')

In [6]:
# Rhyme Related
primary_phonemes = ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', 'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2']
no_stress_phonemes = ['AY', 'AE', 'IY', 'IH', 'AH', 'AA', 'EH', 'EY', 'OW', 'ER', 'AO', 'UW', 'UH', 'AW', 'OY']
rhyme_dict = nltk.corpus.cmudict.dict()
entries = nltk.corpus.cmudict.entries()

## Objects

### Object to manage the Rhyme and Syllable dictionaries

In [7]:
class PronDict:
    
    def __init__(self):
        # Initializing the cmudict dictionary for pronunciations
        self.rhyme_dict = nltk.corpus.cmudict.dict()
        self.entries = nltk.corpus.cmudict.entries()
        self.syllables = {}
        self.additions()
        #self.syllables = self.read_syllables_file()
        #self.update()
    
    #def update(self):
    #    with open('pron_dict_extension.json', 'r+') as file:
    
    def add_pron(self, word, pron):
        self.rhyme_dict[word] = [pron]
        #with open("rhyme_dict_extension.json", 'a+') as file:
        #    json.dump({word: pron}, file)
        #    file.write("\n")
    
    def add_syllables(self, word, syllables):
        if isinstance(syllables, list) and isinstance(word, str):
            syllable_dict = {'count': len(syllables), 'list': syllables}
            word_syllable_dict = {word: {'count': len(syllables), 'list': syllables}}
            self.syllables[word] = syllable_dict
            #with open("syllables.json", "a+") as file:
            #    json.dump(word_syllable_dict, file)
            #    file.write("\n")
        else:
            raise TypeError("Word value must be a string, and syllables value must a list")
    
    #def read_syllables_file(self):
    #    with open("syllables.json", 'r+') as file:
    #        return json.load(file)
    
    def additions(self):
        self.add_syllables("into", ['in', 'to'])
        self.add_pron("I", ['AY1'])
        self.add_pron("sigil", ['S', 'IH1', 'JH', 'AH0', 'L'])

### Object to manage a word to turn into a rhyme

In [8]:
class Rhyme:
    
    def __init__(self, word):
        self.word = word
        self.word_info = self.get_word_info(self.word)
        self.phonemes = self.get_phonemes(self.word)
        self.syllables = self.get_syllables(self.word_info)
        self.phoneme_dict = self.get_phoneme_syllable_dict()
        self.colored_html = self.colorize_to_html()
        
    def get_word_info(self, word):
        if isinstance(word, str): 
            #word = lemmatize(word)
            return json.loads(requests.get(ROOT_URL + word + '/', headers=HEADER).text)
        else:
            raise TypeError("Value must be a string.") 
            
    def get_syllables(self, word_response):
        if self.phonemes != None and len(self.get_phonemes_to_color(self.phonemes)) > 1:
            try:
                return word_response['syllables']
            except KeyError:
                try:
                    return dictionary.syllables[self.word]
                except KeyError:
                    print("Unable to get syllables for:" + self.word)
        else:
            return {'count': 1, 'list': [self.word]}
            
    def get_phonemes(self, word):
        try:
            return rhyme_dict[word][0]
        except KeyError as e:
            try:
                return dictionary.rhyme_dict[word][0]
            except:  
                print("Could not find phonemes for:" + word)
                return None
        
    def get_phonemes_to_color(self, phonemes):
        return [phoneme[:2] for phoneme in phonemes if len(phoneme) > 2]
    
    def get_phoneme_syllable_dict(self):
        phonemes_to_color = self.get_phonemes_to_color(self.phonemes)
        syllables_to_color = self.syllables['list']
        if self.syllables['count'] == len(phonemes_to_color):
            return dict(zip(syllables_to_color, phonemes_to_color))
        else:
            raise IndexError
    
    def colorize_to_html(self):
        line_list = []
        for syllable in self.phoneme_dict:
            line_list.append(f'<h1 style="background-color:{colored_phonemes[self.phoneme_dict[syllable]]}; width:3.25em; display:inline">{syllable}</h1>')
        line_list.append("<h1 style='display:inline'>&nbsp;&nbsp;&nbsp;</h1>")
        return ''.join(line_list)

## Phoneme Colors

In [9]:
color_df = pd.read_html("https://www.ditig.com/256-colors-cheat-sheet")[0]

In [10]:
hex_colors = list(color_df.HEX[color_df["Xterm Number"] < 231])
hex_sample = sample(hex_colors, len(no_stress_phonemes))
sorted_hex = sorted(hex_sample)

In [11]:
colored_phonemes = dict(zip(no_stress_phonemes, hex_sample))

## Set up for testing

In [12]:
dictionary = PronDict()

#### Adding word pronunciations and syllables that are not part of the CMU Dictionary and WordAPI database

In [13]:
dictionary.add_pron("sigil", ['S', 'IH1', 'JH', 'AH0', 'L'])
dictionary.add_syllables("radley", ['rad', 'ley'])
dictionary.add_pron("naptime", ['N', 'AE1', 'P', 'T', 'AY1', 'M'])
dictionary.add_syllables("naptime", ['nap', 'time'])
dictionary.add_syllables("every", ['e', 'ver', 'y'])
dictionary.add_syllables("louise", ['lou', 'ise'])
dictionary.add_pron("moleskin", ['M', 'OW1', 'L', 'S', 'K', 'IH1', 'N'])
dictionary.add_pron("woah", ['W', 'OW1'])
dictionary.add_syllables("tv", ['t', 'v'])
dictionary.add_syllables("jumping", ['jump', 'ing'])
dictionary.add_syllables("expressed", ['ex', 'pressed'])
dictionary.add_syllables("myself", ['my', 'self'])
dictionary.add_syllables("feelings", ['fee', 'lings'])
dictionary.add_syllables("momma", ['mom', 'ma'])
dictionary.add_syllables("everybody", ['e', 'very', 'bo', 'dy'])
dictionary.add_syllables("yourself", ['your', 'self'])
dictionary.add_syllables("gangsta", ['gang', 'sta'])
dictionary.add_syllables("everybody", ['e', 'very', 'thing'])
dictionary.add_syllables("without", ['with', 'out'])
dictionary.add_syllables("women", ['wo', 'men'])
dictionary.add_syllables("huckabee", ['huck', 'a', 'bee'])
dictionary.add_pron("fuckery", ['F', 'AH1', 'K', 'ER0', 'IY0'])
dictionary.add_syllables("fuckery", ['fuck', 'er', 'y'])
dictionary.add_pron("waterwings", ['W', 'AO1', 'T', 'ER0', 'W', 'IH1', 'NG', 'Z'])
dictionary.add_syllables("waterwings", ['wa', 'ter', 'wings'])

## Testing Verses

In [14]:
# Verse text to rhyme
boot_soup_v3 = '''Meanwhile back at the brackish<br>
Half-man, patch-in, splash with the hapless<br>
Tag every hack in the badlands<br>
Spoon-gagged, amass the too trashy<br>
Ragtag, rad to the last Boo Radley<br>
Bad back and limbs that don't act right<br>
Still flap with the bats in the black light<br>
Architect of hard luck, turn love to a crash site<br>
After lunch, before naptime<br>
Z-Z-Z-Z, jaws on floors<br>
A two-bit tour de force, I'm all yours<br>
Rabid and raised to rig the game booth<br>
I lick the plate clean, I lick the blade too, look<br>
Ten beepers, even the freak elite<br>
Fall back when the creature leap<br>
His U.F.O. throw shade sea to sea<br>
Jeez Louise, an omen to read and weep<br>
The road been brain for the hunt, nose for the bone broth<br>
Cold shoulder programmed to stonewall<br>
Old dog, I'm known to cold know all<br>
Don't wait 'til the whole shit snowball<br>
One peek at the code in the moleskin<br>
Might just render you part of the potion, woah<br>
Never you mind the moth eggs<br>
I'm warm and the TV is on always'''
# Text Cleaning
boot_soup_v3 = re.sub(r'[!?,;."\n()]', '', boot_soup_v3)
boot_soup_v3 = re.sub(r'[-:]', ' ', boot_soup_v3)
boot_soup_lines = boot_soup_v3.lower().split("<br>")

In [15]:
# Some helper functions

def rhyme_that_line(bar_to_rhyme):
    words_to_rhyme = bar_to_rhyme.split(' ')
    rhymed_line = ""
    for word in words_to_rhyme:
        try:
            rhyme = Rhyme(word)
            rhymed_line += rhyme.colored_html
        except:
            rhymed_line += f'<h1 width:3.25em; display:inline">{word}</h1>'
            print(word)
    with open("colored_phonemes.html", 'a') as file:
        file.write(rhymed_line)
        file.write("\n")
        
def reset_file(filepath):
    with open(filepath, 'r+') as file:
        file.truncate(0)

#### Generating the Highlighted Rhymes

In [16]:
reset_file("colored_phonemes.html")

for line in boot_soup_lines:
    rhyme_that_line(line)

every
Could not find phonemes for:ufo
ufo
