In [3]:
import pandas as pd
import numpy as np
import pronouncing as pr
from nltk.tokenize import word_tokenize
from collections import OrderedDict
from syllabify.syllabify import syllabify as syl
from nltk.corpus import cmudict

In [4]:
# Reading file line by line

content = []
with open('lyrics/forgot.md') as f:
    for line in f.readlines():
        content.append(word_tokenize(line.lower().strip()))
    
print content

[['forgotten', 'spot', 'in', 'the', 'caribbean']]


In [3]:
type(content)

list

In [5]:
line = 'Forgotten spot in the Caribbean'.lower()
print line

forgotten spot in the caribbean


In [4]:
lines = line.splitlines()
print lines

['forgotten spot in the caribbean']


In [5]:
# this might not be neccessary. Get back to this when the scoring 
# mechanism has been been implemented.

from nltk.tokenize import word_tokenize

tokens = []
for sent in lines:
    tokens.append(word_tokenize(sent))
print tokens

[['forgotten', 'spot', 'in', 'the', 'caribbean']]


In [6]:
from nltk.corpus.reader.util import *
from nltk.corpus.reader.api import *
from nltk.corpus import cmudict

In [7]:
# option one: returning the words in tupel form with word and aphabet.

from nltk.corpus import cmudict

def word_aphabet(string):

    aphabet = cmudict.dict()
    output = []
    for i, word in enumerate(string):
        try:
            output.append((word, aphabet[word][0]))
        except Exception as e:
            print e
    return output

output = word_aphabet(content[0])
print output

[('forgotten', [u'F', u'ER0', u'G', u'AA1', u'T', u'AH0', u'N']), ('spot', [u'S', u'P', u'AA1', u'T']), ('in', [u'IH0', u'N']), ('the', [u'DH', u'AH0']), ('caribbean', [u'K', u'ER0', u'IH1', u'B', u'IY0', u'AH0', u'N'])]


In [8]:
# accessing the the data 
for l in output:
    print l[0]
#     print l[1][1]
    print l[1]

forgotten
[u'F', u'ER0', u'G', u'AA1', u'T', u'AH0', u'N']
spot
[u'S', u'P', u'AA1', u'T']
in
[u'IH0', u'N']
the
[u'DH', u'AH0']
caribbean
[u'K', u'ER0', u'IH1', u'B', u'IY0', u'AH0', u'N']


In [9]:
from nltk.corpus import PlaintextCorpusReader

In [10]:
# corpus_root = '/home/yoyo/Desktop'
# wordlists = PlaintextCorpusReader(corpus_root, 'mini.txt')
# wordlists.words()

# test = word_aphabet(wordlists.words())
# print test[2]

In [11]:
line = pr.phones_for_word('forgotten')[0].split()
length = [len(ch) for ch in line]
print line, length

[u'F', u'ER0', u'G', u'AA1', u'T', u'AH0', u'N'] [1, 3, 1, 3, 1, 3, 1]


In [15]:
lst = []
for ind, pho in enumerate(line):
    if len(pho) > 1:
        lst.append(line[ind-1:ind+1])
    else:
        continue

print lst

[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0']]


In [16]:
lst = []
for ind, pho in enumerate(line):
    if len(pho) > 1 and pho[-1] == 0:
        lst.append(line[ind-1:ind+1])
    else:
        continue

print lst

[]


In [17]:
line[-1] in lst

False

In [18]:
line2 = pr.phones_for_word('caribbean')[0].split()
length = [len(ch) for ch in line]
print line2, length

[u'K', u'EH0', u'R', u'IH1', u'B', u'IY2', u'AH0', u'N'] [1, 3, 1, 3, 1, 3, 1]


In [19]:
# lst = []
# for ind, pho in enumerate(line):
#     if len(pho) > 1 and pho[-1].isdigit():
#         if int(pho[-1]) > 0:
#             lst.append(line[ind-1:ind])
#         else:
#             lst.append(line[ind-1:ind+1])
#     else:
#         continue

# print lst

In [8]:
#!/usr/bin/env python
# Copyright (c) 2012-2013 Kyle Gorman <gormanky@ohsu.edu>
# 
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the 
# "Software"), to deal in the Software without restriction, including 
# without limitation the rights to use, copy, modify, merge, publish, 
# distribute, sublicense, and/or sell copies of the Software, and to 
# permit persons to whom the Software is furnished to do so, subject to 
# the following conditions:
# 
# The above copyright notice and this permission notice shall be included 
# in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 
# syllabify.py: prosodic parsing of ARPABET entries

from itertools import chain

## constants
SLAX   = {'IH1', 'IH2', 'EH1', 'EH2', 'AE1', 'AE2', 'AH1', 'AH2', 
                                                    'UH1', 'UH2',}
VOWELS = {'IY1', 'IY2', 'IY0', 'EY1', 'EY2', 'EY0', 'AA1', 'AA2', 'AA0',
          'ER1', 'ER2', 'ER0', 'AW1', 'AW2', 'AW0', 'AO1', 'AO2', 'AO0',
          'AY1', 'AY2', 'AY0', 'OW1', 'OW2', 'OW0', 'OY1', 'OY2', 'OY0',
          'IH0', 'EH0', 'AE0', 'AH0', 'UH0', 'UW1', 'UW2', 'UW0', 'UW',
          'IY',  'EY',  'AA',  'ER',   'AW', 'AO',  'AY',  'OW',  'OY',  
          'UH',  'IH',  'EH',  'AE',  'AH',  'UH',} | SLAX

## licit medial onsets

O2 = {('P', 'R'), ('T', 'R'), ('K', 'R'), ('B', 'R'), ('D', 'R'),
      ('G', 'R'), ('F', 'R'), ('TH', 'R'), 
      ('P', 'L'), ('K', 'L'), ('B', 'L'), ('G', 'L'), 
      ('F', 'L'), ('S', 'L'),
      ('K', 'W'), ('G', 'W'), ('S', 'W'),
      ('S', 'P'), ('S', 'T'), ('S', 'K'),
      ('HH', 'Y'), # "clerihew"
      ('R', 'W'),}
O3 = {('S', 'T', 'R'), ('S', 'K', 'L'), ('T', 'R', 'W')} # "octroi"

# This does not represent anything like a complete list of onsets, but 
# merely those that need to be maximized in medial position.

def syllabify(pron, alaska_rule=True):
    """
    Syllabifies a CMU dictionary (ARPABET) word string

    # Alaska rule:
    >>> pprint(syllabify('AH0 L AE1 S K AH0'.split())) # Alaska
    '-AH0-.L-AE1-S.K-AH0-'
    >>> pprint(syllabify('AH0 L AE1 S K AH0'.split(), 0)) # Alaska
    '-AH0-.L-AE1-.S K-AH0-'

    # huge medial onsets:
    >>> pprint(syllabify('M IH1 N S T R AH0 L'.split())) # minstrel
    'M-IH1-N.S T R-AH0-L'
    >>> pprint(syllabify('AA1  K T R W AA0 R'.split())) # octroi
    '-AA1-K.T R W-AA0-R'

    # destressing
    >>> pprint(destress(syllabify('M IH1 L AH0 T EH2 R IY0'.split())))
    'M-IH-.L-AH-.T-EH-.R-IY-'

    # normal treatment of 'j':
    >>> pprint(syllabify('M EH1 N Y UW0'.split())) # menu
    'M-EH1-N.Y-UW0-'
    >>> pprint(syllabify('S P AE1 N Y AH0 L'.split())) # spaniel
    'S P-AE1-N.Y-AH0-L'
    >>> pprint(syllabify('K AE1 N Y AH0 N'.split())) # canyon
    'K-AE1-N.Y-AH0-N'
    >>> pprint(syllabify('M IH0 N Y UW2 EH1 T'.split())) # minuet
    'M-IH0-N.Y-UW2-.-EH1-T'
    >>> pprint(syllabify('JH UW1 N Y ER0'.split())) # junior
    'JH-UW1-N.Y-ER0-'
    >>> pprint(syllabify('K L EH R IH HH Y UW'.split())) # clerihew
    'K L-EH-.R-IH-.HH Y-UW-'

    # nuclear treatment of 'j'
    >>> pprint(syllabify('R EH1 S K Y UW0'.split())) # rescue
    'R-EH1-S.K-Y UW0-'
    >>> pprint(syllabify('T R IH1 B Y UW0 T'.split())) # tribute
    'T R-IH1-B.Y-UW0-T'
    >>> pprint(syllabify('N EH1 B Y AH0 L AH0'.split())) # nebula
    'N-EH1-B.Y-AH0-.L-AH0-'
    >>> pprint(syllabify('S P AE1 CH UH0 L AH0'.split())) # spatula
    'S P-AE1-.CH-UH0-.L-AH0-'
    >>> pprint(syllabify('AH0 K Y UW1 M AH0 N'.split())) # acumen
    '-AH0-K.Y-UW1-.M-AH0-N'
    >>> pprint(syllabify('S AH1 K Y AH0 L IH0 N T'.split())) # succulent
    'S-AH1-K.Y-AH0-.L-IH0-N T'
    >>> pprint(syllabify('F AO1 R M Y AH0 L AH0'.split())) # formula
    'F-AO1 R-M.Y-AH0-.L-AH0-'
    >>> pprint(syllabify('V AE1 L Y UW0'.split())) # value
    'V-AE1-L.Y-UW0-'

    # everything else
    >>> pprint(syllabify('N AO0 S T AE1 L JH IH0 K'.split())) # nostalgic
    'N-AO0-.S T-AE1-L.JH-IH0-K'
    >>> pprint(syllabify('CH ER1 CH M AH0 N'.split())) # churchmen
    'CH-ER1-CH.M-AH0-N'
    >>> pprint(syllabify('K AA1 M P AH0 N S EY2 T'.split())) # compensate
    'K-AA1-M.P-AH0-N.S-EY2-T'
    >>> pprint(syllabify('IH0 N S EH1 N S'.split())) # inCENSE
    '-IH0-N.S-EH1-N S'
    >>> pprint(syllabify('IH1 N S EH2 N S'.split())) # INcense
    '-IH1-N.S-EH2-N S'
    >>> pprint(syllabify('AH0 S EH1 N D'.split())) # ascend
    '-AH0-.S-EH1-N D'
    >>> pprint(syllabify('R OW1 T EY2 T'.split())) # rotate
    'R-OW1-.T-EY2-T'
    >>> pprint(syllabify('AA1 R T AH0 S T'.split())) # artist
    '-AA1 R-.T-AH0-S T'
    >>> pprint(syllabify('AE1 K T ER0'.split())) # actor
    '-AE1-K.T-ER0-'
    >>> pprint(syllabify('P L AE1 S T ER0'.split())) # plaster
    'P L-AE1-S.T-ER0-'
    >>> pprint(syllabify('B AH1 T ER0'.split())) # butter
    'B-AH1-.T-ER0-'
    >>> pprint(syllabify('K AE1 M AH0 L'.split())) # camel
    'K-AE1-.M-AH0-L'
    >>> pprint(syllabify('AH1 P ER0'.split())) # upper
    '-AH1-.P-ER0-'
    >>> pprint(syllabify('B AH0 L UW1 N'.split())) # balloon
    'B-AH0-.L-UW1-N'
    >>> pprint(syllabify('P R OW0 K L EY1 M'.split())) # proclaim
    'P R-OW0-.K L-EY1-M'
    >>> pprint(syllabify('IH0 N S EY1 N'.split())) # insane
    '-IH0-N.S-EY1-N'
    >>> pprint(syllabify('IH0 K S K L UW1 D'.split())) # exclude
    '-IH0-K.S K L-UW1-D'
    """
    ## main pass
    mypron = list(pron)
    nuclei = []
    onsets = []
    i = -1
    for (j, seg) in enumerate(mypron):
        if seg in VOWELS:
            nuclei.append([seg])
            onsets.append(mypron[i + 1:j]) # actually interludes, r.n.
            i = j                        
    codas = [mypron[i + 1:]]
    ## resolve disputes and compute coda
    for i in xrange(1, len(onsets)):
        coda = []
        # boundary cases
        if len(onsets[i]) > 1 and onsets[i][0] == 'R':
            nuclei[i - 1].append(onsets[i].pop(0))
        if len(onsets[i]) > 2 and onsets[i][-1] == 'Y':
            nuclei[i].insert(0, onsets[i].pop())
        if len(onsets[i]) > 1 and alaska_rule and nuclei[i-1][-1] in SLAX \
                                              and onsets[i][0] == 'S':
            coda.append(onsets[i].pop(0))
        # onset maximization
        depth = 1
        if len(onsets[i]) > 1:
            if tuple(onsets[i][-2:]) in O2:
                depth = 3 if tuple(onsets[i][-3:]) in O3 else 2
        for j in xrange(len(onsets[i]) - depth):
            coda.append(onsets[i].pop(0))
        # store coda
        codas.insert(i - 1, coda)

    ## verify that all segments are included in the ouput
    output = zip(onsets, nuclei, codas)
    flat_output = list(chain.from_iterable(chain.from_iterable(output)))
    if flat_output != mypron:
        raise ValueError("could not syllabify {}, got {}".format(mypron, 
                                                           flat_output))
    return output


def pprint(syllab):
    """
    Pretty-print a syllabification
    """
    return '.'.join('-'.join(' '.join(p) for p in syl) for syl in syllab)


def destress(syllab):
    """
    Generate a syllabification with nuclear stress information removed
    """
    syls = []
    for (onset, nucleus, coda) in syllab:
        nuke = [p[:-1] if p[-1] in {'0', '1', '2'} else p for p in nucleus]
        syls.append((onset, nuke, coda))
    return syls


# if __name__ == '__main__':
#     import doctest
#     doctest.testmod()

In [10]:
text = 'forgotten spot in the caribbean'.split()
text

['forgotten', 'spot', 'in', 'the', 'caribbean']

In [11]:
# Revisit for multiple line updates

word_phonetic = OrderedDict()
for word in text:
    word_phonetic.update({word:pr.phones_for_word(word)[0]})

word_phonetic

OrderedDict([('forgotten', u'F ER0 G AA1 T AH0 N'),
             ('spot', u'S P AA1 T'),
             ('in', u'IH0 N'),
             ('the', u'DH AH0'),
             ('caribbean', u'K EH0 R IH1 B IY2 AH0 N')])

In [12]:
syll = OrderedDict()
for key, val in word_phonetic.iteritems():
    check = syllabify(val.split())
#     temp = []
#     for sound in check:
#         check += sound
    syll.update({key:check})

print syll

OrderedDict([('forgotten', [([u'F'], [u'ER0'], []), ([u'G'], [u'AA1'], []), ([u'T'], [u'AH0'], [u'N'])]), ('spot', [([u'S', u'P'], [u'AA1'], [u'T'])]), ('in', [([], [u'IH0'], [u'N'])]), ('the', [([u'DH'], [u'AH0'], [])]), ('caribbean', [([u'K'], [u'EH0'], []), ([u'R'], [u'IH1'], []), ([u'B'], [u'IY2'], []), ([], [u'AH0'], [u'N'])])])


In [13]:
print syll.values()

[[([u'F'], [u'ER0'], []), ([u'G'], [u'AA1'], []), ([u'T'], [u'AH0'], [u'N'])], [([u'S', u'P'], [u'AA1'], [u'T'])], [([], [u'IH0'], [u'N'])], [([u'DH'], [u'AH0'], [])], [([u'K'], [u'EH0'], []), ([u'R'], [u'IH1'], []), ([u'B'], [u'IY2'], []), ([], [u'AH0'], [u'N'])]]


In [28]:
check = OrderedDict()
for key, val in syll.iteritems():
    print val
    for sound in val:
        temp = []
        for a in sound:
            temp += a
#         print temp
    check.update({key:temp})

print check.keys()

[([u'F'], [u'ER0'], []), ([u'G'], [u'AA1'], []), ([u'T'], [u'AH0'], [u'N'])]
[([u'S', u'P'], [u'AA1'], [u'T'])]
[([], [u'IH0'], [u'N'])]
[([u'DH'], [u'AH0'], [])]
[([u'K'], [u'EH0'], []), ([u'R'], [u'IH1'], []), ([u'B'], [u'IY2'], []), ([], [u'AH0'], [u'N'])]
['forgotten', 'spot', 'in', 'the', 'caribbean']


In [27]:
check = OrderedDict()
for key, val in syll.iteritems():
    temp2 = []
    for l in val:
        temp = []
        for b in l:
            temp += b
        temp2.append(temp)
    check.update({key:temp2})

print check.values()


[[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']], [[u'S', u'P', u'AA1', u'T']], [[u'IH0', u'N']], [[u'DH', u'AH0']], [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]]


In [29]:
for k, v in check.iteritems():
    print k, v

forgotten [u'T', u'AH0', u'N']
spot [u'S', u'P', u'AA1', u'T']
in [u'IH0', u'N']
the [u'DH', u'AH0']
caribbean [u'AH0', u'N']


In [17]:
def score_size(check):
    size = 0
    for word in check.values():
        size += len(word)
    return np.zeros((size,size))

m = score_size(check)
print m.shape

(10, 10)


In [18]:
def similarity_mat(check):
    temp = check.values()
    score = score_size(check)
#     print score, '\n'
    col = []
    row = []
    for word1 in check.values():
        col_ind = 0
        for syl1 in word1:
            col.append(syl1)
            row_ind = 0
            for word2 in temp:
                if word1 != word2:
                    for syl2 in word2:
                        row.append(syl2)
                        score[row_ind][col_ind] = 1
                else:
                    for syl2 in word2:
                        row.append(syl2)
                        score[row_ind][col_ind] = -1
            row_ind += 1
        col_ind +=1
    return score, col, row

a, b, c = similarity_mat(check)
print a

[[-1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]


In [19]:
check.values()

[[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']],
 [[u'S', u'P', u'AA1', u'T']],
 [[u'IH0', u'N']],
 [[u'DH', u'AH0']],
 [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]]

In [20]:
syls=[]
for val in check.itervalues():
    for syl in val:
        syls.append(syl)
        
# for i, syl1 in enumerate(syls):
#     for j , syl2 in enumer   syls:
#         score(syl1,syl2)

In [37]:
syls

[[u'F', u'ER0'],
 [u'G', u'AA1'],
 [u'T', u'AH0', u'N'],
 [u'S', u'P', u'AA1', u'T'],
 [u'IH0', u'N'],
 [u'DH', u'AH0'],
 [u'K', u'EH0'],
 [u'R', u'IH1'],
 [u'B', u'IY2'],
 [u'AH0', u'N']]

In [21]:
check.values()

[[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']],
 [[u'S', u'P', u'AA1', u'T']],
 [[u'IH0', u'N']],
 [[u'DH', u'AH0']],
 [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]]

In [22]:
def syllable_list(check):
    syl_list = []
    for val in check.itervalues():
        syl_list.extend(val)
    print syl_list, 'check \n'
    return syl_list

tt = syllable_list(check)

[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N'], [u'S', u'P', u'AA1', u'T'], [u'IH0', u'N'], [u'DH', u'AH0'], [u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']] check 



In [23]:
def score_size(check):
    size = 0
    for word in check.itervalues():
        size += len(word)
    return np.zeros((size,size))

In [24]:
def sound_intersect(phonetic1, phonetic2):
    '''
    Input: Two phonetic to be compared
    Output: sound found in both phonetic

    Find common sounds in the two phonetics
    '''
    return list(set(phonetic1) & set(phonetic2))

In [25]:
def is_vowel(text):
    '''
    Returns true if a character is a vowel
    '''
    vowels = 'aeiouAEIOU'
    return [True if char in vowels else False for char in text]

In [26]:
def is_num(char = 'x'):
    '''
    Returns true if a character is a number
    '''
    return char.isdigit()

In [27]:
def vowel_score(sound):
    '''
    Input: Vowel phone
    Output: Vowel phone score

    Assigns score to a common phone vowel bewteen 2 words based on their stress.
    '''
    if sound[-1] > 0:
        return len(sound) * 3
    else:
        return len(sound) + 2

In [28]:
def consonant_score(phonetic1, phonetic2, sound):
    '''
    Input: Consonant phone
    Output: Consonant phone score

    Assigns consonant phone score based on whether it is prefix or suffix
    '''
#     print sound
    sound_loc = prefix_check(phonetic1, phonetic2, sound)
    if sound_loc == 1:
        return 1
    elif sound_loc == 2:
        return 2.5
    else:
        print 'content 0'
        return 0

In [29]:
# This works

def prefix_check(phonetic1, phonetic2, sound):
    '''
    Input: Two phonetic sounds with common element sound
    Output: Bool if the sound is prefix

    Checks whether a consonant is a prefix or suffix or didn't match either
    1 - prefix, 2 - suffix, 0 - mix-match
    '''
    print '\nsound check: {}'.format(sound)
    ph1_len = [len(cons) for cons in phonetic1]
    ph2_len = [len(cons) for cons in phonetic2]
    ind1 = phonetic1.index(sound)
    ind2 = phonetic2.index(sound)
    if (ind1 < ph1_len.index(max(ph1_len))) and (ind2 < ph2_len.index(max(ph2_len))):
        return 1
    elif (ind1 > ph1_len.index(max(ph1_len))) and (ind2 > ph2_len.index(max(ph2_len))):
        return 2
    else:
        return 0

In [30]:
def final_score(phonetic1, phonetic2, common_sounds):
    '''
    Input: 2 phonetic and list of common sounds between two phonetics
    Output: Score based on phonetic sound

    Calculates score for the common sounds
    '''
    points = 0
    for sound in common_sounds:
        if len(sound) > 1:
            points += vowel_score(sound)
        else:
            points += consonant_score(phonetic1, phonetic2, sound)
    return points

In [31]:
import copy 
def similarity_mat(syllable_dict):
    syl_list1 = syllable_list(syllable_dict)
    syl_list2 = copy.deepcopy(syl_list1)
    score = score_size(syllable_dict)
    for ind1, val1 in enumerate(syl_list1):
        for ind2, val2 in enumerate(syl_list2):
            if val1 != val2:
                common_sound = sound_intersect(val1, val2)
                if len(common_sound) > 0:
                    points = final_score(val1, val2, common_sound)
                    score[ind1][ind2] = points
                else:
                    score[ind1][ind2] = 0
            else:
                score[ind1][ind2] = -99
    return score, syl_list1

In [32]:
score, col = similarity_mat(check)

[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N'], [u'S', u'P', u'AA1', u'T'], [u'IH0', u'N'], [u'DH', u'AH0'], [u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']] check 


sound check: T
content 0

sound check: N

sound check: N

sound check: T
content 0

sound check: N

sound check: N

sound check: N

sound check: N


In [33]:
score

array([[-99. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. , -99. ,   0. ,   9. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. , -99. ,   0. ,   2.5,   9. ,   0. ,   0. ,   0. ,
         11.5],
       [  0. ,   9. ,   0. , -99. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   2.5,   0. , -99. ,   0. ,   0. ,   0. ,   0. ,
          2.5],
       [  0. ,   0. ,   9. ,   0. ,   0. , -99. ,   0. ,   0. ,   0. ,   9. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. , -99. ,   0. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. , -99. ,   0. ,   0. ],
       [  0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. ,   0. , -99. ,   0. ],
       [  0. ,   0. ,  11.5,   0. ,   2.5,   9. ,   0. ,   0. ,   0. , -99. ]])

In [34]:
score == score.T

array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]], dtype=bool)

In [35]:
print score[2][4]
print score[4][2]

2.5
2.5


In [36]:
lines = OrderedDict([('got', [[u'G', u'AA1', u'T']]),
             ('me', [[u'M', u'IY1']]),
             ('breathing', [[u'B', u'R', u'IY1'], [u'DH', u'IH0', u'NG']]),
             ('with', [[u'W', u'IH1', u'DH']]),
             ('dragons', [[u'D', u'R', u'AE1'], [u'G', u'AH0', u'N', u'Z']]),
             ('i', [[u'AY1']]),
             ('crack', [[u'K', u'R', u'AE1', u'K']]),
             ('the', [[u'DH', u'AH0']]),
             ('egg', [[u'EH1', u'G']]),
             ('in', [[u'IH0', u'N']]),
             ('your', [[u'Y', u'AO1', u'R']]),
             ('basket', [[u'B', u'AE1', u'S'], [u'K', u'AH0', u'T']]),
             ('you', [[u'Y', u'UW1']]),
             ('bastard', [[u'B', u'AE1', u'S'], [u'T', u'ER0', u'D']]),
             ("'m", [[u'AH0', u'M']]),
             ('marilyn',
              [[u'M', u'EH1'], [u'R', u'AH0'], [u'L', u'AH0', u'N']]),
             ('manson', [[u'M', u'AE1', u'N'], [u'S', u'AH0', u'N']]),
             ('madness', [[u'M', u'AE1', u'D'], [u'N', u'AH0', u'S']]),
             ('now', [[u'N', u'AW1']]),
             ('just', [[u'JH', u'AH1', u'S', u'T']]),
             ('imagine', [[u'IH2'], [u'M', u'AE1'], [u'JH', u'AH0', u'N']]),
             ('magic', [[u'M', u'AE1'], [u'JH', u'IH0', u'K']]),
             ('light', [[u'L', u'AY1', u'T']]),
             ('to', [[u'T', u'UW1']]),
             ('asses', [[u'AE1'], [u'S', u'AH0', u'Z']]),
             ('do', [[u'D', u'UW1']]),
             ('ask', [[u'AE1', u'S', u'K']]),
             ('for', [[u'F', u'AO1', u'R']]),
             ('favorite', [[u'F', u'EY1'], [u'V', u'ER0'], [u'IH0', u'T']]),
             ('rapper', [[u'R', u'AE1'], [u'P', u'ER0']])])

print lines.keys()

['got', 'me', 'breathing', 'with', 'dragons', 'i', 'crack', 'the', 'egg', 'in', 'your', 'basket', 'you', 'bastard', "'m", 'marilyn', 'manson', 'madness', 'now', 'just', 'imagine', 'magic', 'light', 'to', 'asses', 'do', 'ask', 'for', 'favorite', 'rapper']


In [37]:
for val in lines.itervalues():
    print val

[[u'G', u'AA1', u'T']]
[[u'M', u'IY1']]
[[u'B', u'R', u'IY1'], [u'DH', u'IH0', u'NG']]
[[u'W', u'IH1', u'DH']]
[[u'D', u'R', u'AE1'], [u'G', u'AH0', u'N', u'Z']]
[[u'AY1']]
[[u'K', u'R', u'AE1', u'K']]
[[u'DH', u'AH0']]
[[u'EH1', u'G']]
[[u'IH0', u'N']]
[[u'Y', u'AO1', u'R']]
[[u'B', u'AE1', u'S'], [u'K', u'AH0', u'T']]
[[u'Y', u'UW1']]
[[u'B', u'AE1', u'S'], [u'T', u'ER0', u'D']]
[[u'AH0', u'M']]
[[u'M', u'EH1'], [u'R', u'AH0'], [u'L', u'AH0', u'N']]
[[u'M', u'AE1', u'N'], [u'S', u'AH0', u'N']]
[[u'M', u'AE1', u'D'], [u'N', u'AH0', u'S']]
[[u'N', u'AW1']]
[[u'JH', u'AH1', u'S', u'T']]
[[u'IH2'], [u'M', u'AE1'], [u'JH', u'AH0', u'N']]
[[u'M', u'AE1'], [u'JH', u'IH0', u'K']]
[[u'L', u'AY1', u'T']]
[[u'T', u'UW1']]
[[u'AE1'], [u'S', u'AH0', u'Z']]
[[u'D', u'UW1']]
[[u'AE1', u'S', u'K']]
[[u'F', u'AO1', u'R']]
[[u'F', u'EY1'], [u'V', u'ER0'], [u'IH0', u'T']]
[[u'R', u'AE1'], [u'P', u'ER0']]


In [38]:
check.values()

[[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']],
 [[u'S', u'P', u'AA1', u'T']],
 [[u'IH0', u'N']],
 [[u'DH', u'AH0']],
 [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]]

In [39]:
a = check.values()[1][0]
b = check.values()[0][2]
print a, b

[u'S', u'P', u'AA1', u'T'] [u'T', u'AH0', u'N']


In [186]:
c =sound_intersect(a, b)
print c

[u'T']


In [191]:
s = final_score(a, b, c)
s


 sound check: T


0

In [169]:
v = vowel_score(c[0])
v

9

In [None]:
a

In [194]:
## Test for multiple lines entries

In [201]:
check

OrderedDict([('forgotten',
              [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]),
             ('spot', [[u'S', u'P', u'AA1', u'T']]),
             ('in', [[u'IH0', u'N']]),
             ('the', [[u'DH', u'AH0']]),
             ('caribbean',
              [[u'K', u'EH0'],
               [u'R', u'IH1'],
               [u'B', u'IY2'],
               [u'AH0', u'N']])])

In [217]:
test = check.values()
test.extend(test)
for a in test:
    print a[0]

[u'F', u'ER0']
[u'S', u'P', u'AA1', u'T']
[u'IH0', u'N']
[u'DH', u'AH0']
[u'K', u'EH0']
[u'F', u'ER0']
[u'S', u'P', u'AA1', u'T']
[u'IH0', u'N']
[u'DH', u'AH0']
[u'K', u'EH0']


In [126]:
col

[[u'F', u'ER0'],
 [u'G', u'AA1'],
 [u'T', u'AH0', u'N'],
 [u'S', u'P', u'AA1', u'T'],
 [u'IH0', u'N'],
 [u'DH', u'AH0'],
 [u'K', u'EH0'],
 [u'R', u'IH1'],
 [u'B', u'IY2'],
 [u'AH0', u'N']]

In [127]:
score

array([[-99.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0., -99.,   0.,   9.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0., -99.,   1.,   2.,   9.,   0.,   0.,   0.,  11.],
       [  0.,   9.,   2., -99.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   1.,   0., -99.,   0.,   0.,   0.,   0.,   2.],
       [  0.,   0.,   9.,   0.,   0., -99.,   0.,   0.,   0.,   9.],
       [  0.,   0.,   0.,   0.,   0.,   0., -99.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0., -99.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0., -99.,   0.],
       [  0.,   0.,  10.,   0.,   2.,   9.,   0.,   0.,   0., -99.]])

In [74]:
score.shape

(10, 10)

In [128]:
tt = col[3]
tt

[u'S', u'P', u'AA1', u'T']

In [105]:
see = [len(ph) for ph in tt]
see

[1, 1, 3, 1]

In [134]:
for s in c:
    print a, prefix_check(a, b, c[1])
    print b, prefix_check(a, b, c[1])

[u'T', u'AH0', u'N'] 2
[u'AH0', u'N'] 2
[u'T', u'AH0', u'N'] 2
[u'AH0', u'N'] 2


In [131]:
c

[u'AH0', u'N']

In [40]:
tt

[[u'F', u'ER0'],
 [u'G', u'AA1'],
 [u'T', u'AH0', u'N'],
 [u'S', u'P', u'AA1', u'T'],
 [u'IH0', u'N'],
 [u'DH', u'AH0'],
 [u'K', u'EH0'],
 [u'R', u'IH1'],
 [u'B', u'IY2'],
 [u'AH0', u'N']]

In [42]:
a = ['a', 'v', 'c', 'd']
b = ['c', 'v','a', 'd']
print a, b

['a', 'v', 'c', 'd'] ['c', 'v', 'a', 'd']


In [43]:
c = list(set(a) & set(b))
c

['a', 'c', 'd', 'v']

In [45]:
print a.index(c[0])
print b.index(c[0])

0
2


In [45]:
testing = np.array([[1,2,3],[4,5,6]])
testing.shape

(2, 3)

In [47]:
testing[0][2] = 3**2
print testing

[[1 2 9]
 [4 5 6]]


In [50]:
n = len(check.values())
sounds = check.vl()
print sounds
for ind, val in enumerate(sounds):
    print ind, val
    while ind < n:
        for sound in val:
            print sound, 'check'
            comp = sounds[ind+1]
            for v in comp:
                print sound, v

<generator object iteritems at 0x7f962a325870>
0 ('forgotten', [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']])
forgotten check


TypeError: 'generator' object has no attribute '__getitem__'

In [None]:
# n = len(check.values())
# for ind, val in enumerate(check.values()):
#     print ind, val, '\n\n\n'
#     while ind < n:
#         for sound in val:
#             for comp in check.values()[ind+1]
#             print list(set(sound) & set()

In [None]:
print list(set(check.values()[0]) & set(check.values()[4]))

In [None]:
n = len(check.values())
n

In [240]:
from ScoreMechanism import ScoreMechanism
from PrepareText import PrepareText

In [241]:
text = PrepareText('lyrics/test.md')
score = ScoreMechanism(text.lyrics_tokenized, text.syllable_dict)

list index out of range


In [243]:
score.col

[[u'F', u'ER0'],
 [u'G', u'AA1'],
 [u'T', u'AH0', u'N'],
 [u'S', u'P', u'AA1', u'T'],
 [u'IH0', u'N']]

In [242]:
score.adjacency_matrix

array([[ 0. ,  0. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  9. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  2.5],
       [ 0. ,  9. ,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. ,  0. ,  0. ]])

In [244]:
print score.col[2]
print score.col[4]

[u'T', u'AH0', u'N']
[u'IH0', u'N']


In [258]:
a = score.col[2]
b = score.col[4]
c = sound_intersect(a, b)
print a
print b
print c

[u'T', u'AH0', u'N']
[u'IH0', u'N']
[u'N']


In [260]:
print final_score(a,b,c)
print final_score(b,a,c)


sound check: N
2.5

sound check: N
2.5


In [261]:
print a
print b
print c
# print prefix_check(a,b,c)
print prefix_check(b,a,c)

[u'T', u'AH0', u'N']
[u'IH0', u'N']
[u'N']

sound check: [u'N']


ValueError: [u'N'] is not in list

In [262]:
# Web Scrapping
import requests
from bs4 import BeautifulSoup

In [263]:

r = requests.get('http://genius.com/artists/Lin-manuel-miranda')
soup = BeautifulSoup(r.text, 'html.parser')


In [269]:
# links = []
a = soup.find_all('class' = 'mini_card')
a

SyntaxError: keyword can't be an expression (<ipython-input-269-05957ac90bf4>, line 2)

## Reconstructing words

In [270]:
import pronouncing as pr

In [271]:
text = 'forgotten spot in the caribbean'
text

'forgotten spot in the caribbean'

In [275]:
phones = []
for word in text.split():
    phones.append(pr.phones_for_word(word))

In [277]:
phones[0]

[u'F ER0 G AA1 T AH0 N', u'F AO0 R G AA1 T AH0 N']

In [33]:
for key, val in check.iteritems():
    print key, val

forgotten [u'T', u'AH0', u'N']
spot [u'S', u'P', u'AA1', u'T']
in [u'IH0', u'N']
the [u'DH', u'AH0']
caribbean [u'AH0', u'N']


In [32]:
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
        print ind, sound

0 T
1 AH0
2 N
0 S
1 P
2 AA1
3 T
0 IH0
1 N
0 DH
1 AH0
0 AH0
1 N


In [19]:
a = check.values()[0]
a

[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]

In [16]:
for ind, val in enumerate(a):
    while ind < a.indext

N


In [22]:
test = OrderedDict()
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
        if ind < val.index(val[-1]):
            print ind, val[ind+1][0]

0 G
1 T
2 G
0 R
1 B
2 AH0
3 R


In [25]:
test = OrderedDict()
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
        if (ind < len(val)) and (len(val[ind+1][0]) == 1):
            print sound
#             sound.extend(val[ind+1][0])
            print sound
#     test.update({key:val})    
    
test

[u'F', u'ER0', u'G']
[u'F', u'ER0', u'G']
[u'G', u'AA1', u'T']
[u'G', u'AA1', u'T']
[u'T', u'AH0', u'N', u'G']
[u'T', u'AH0', u'N', u'G']
G
G
T
T
G
G
G
G
T
T
G
G
G
G
T
T


IndexError: list index out of range

In [286]:
def wrapped_vowels(check):
    '''
    Ensures that that a vowel is wrapped by consonants
    '''
#     temp_syl = clean_syllables()
    temp_dict = OrderedDict()
    for key, val in check.iteritems():
        for ind, sound in enumerate(val):
            temp_list = []
            while ind < val.index(val[-1]):
                if len(val[ind+1][0]) == 1:
                    val.extend(val[ind+1][0])
                else:
                    continue
        temp_dict.update({key:val})
    return temp_dict

test = wrapped_vowels(check)

KeyboardInterrupt: 

In [284]:
for key, val in test.iteritems():
    print key, val

AttributeError: 'list' object has no attribute 'iteritems'

In [75]:
from ScoreMechanism import ScoreMechanism

score = ScoreMechanism('lyrics/forgot.md')
for key, val in score.syllable_dict.iteritems():
    print key, val

forgotten [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]
spot [[u'S', u'P', u'AA1', u'T']]
in [[u'IH0', u'N']]
the [[u'DH', u'AH0']]
caribbean [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]


In [165]:
check = copy.deepcopy(score.syllable_dict)
check

OrderedDict([('forgotten',
              [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]),
             ('spot', [[u'S', u'P', u'AA1', u'T']]),
             ('in', [[u'IH0', u'N']]),
             ('the', [[u'DH', u'AH0']]),
             ('caribbean',
              [[u'K', u'EH0'],
               [u'R', u'IH1'],
               [u'B', u'IY2'],
               [u'AH0', u'N']])])

In [127]:
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
#         print val.index(sound), ind
        temp = copy.deepcopy(sound)
        if ind < (len(val) - 1):
            print 'first condition ind: {}, {}'.format(val.index(sound), (len(val)-1))
        else:
            print 'second condition ind: {}, {}'.format(val.index(sound), (len(val) -1))

first condition ind: 0, 2
first condition ind: 1, 2
second condition ind: 2, 2
second condition ind: 0, 0
second condition ind: 0, 0
second condition ind: 0, 0
first condition ind: 0, 3
first condition ind: 1, 3
first condition ind: 2, 3
second condition ind: 3, 3


In [113]:
for ind, ph in enumerate(check.itervalues()):
    print ind, ph

0 [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]
1 [[u'S', u'P', u'AA1', u'T']]
2 [[u'IH0', u'N']]
3 [[u'DH', u'AH0']]
4 [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]


In [152]:
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
        temp = copy.deepcopy(sound)
        if ind < (len(val) - 1):
            temp.extend(val[ind+1][0])
            print 'first: {}, {}'.format(temp, ind)
        else:
            print 'second: {} {}'.format(temp, ind)
    d.update({key:temp})
    
# print d

first: [u'F', u'ER0', u'G'], 0
first: [u'G', u'AA1', u'T'], 1
second: [u'T', u'AH0', u'N'] 2
second: [u'S', u'P', u'AA1', u'T'] 0
second: [u'IH0', u'N'] 0
second: [u'DH', u'AH0'] 0
first: [u'K', u'EH0', u'R'], 0
first: [u'R', u'IH1', u'B'], 1
first: [u'B', u'IY2', u'A', u'H', u'0'], 2
second: [u'AH0', u'N'] 3


In [156]:
check['forgotten']

[[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]

In [164]:
check

OrderedDict([('forgotten',
              [[u'F', u'ER0', u'F', u'G', u'G'],
               [u'G', u'AA1', u'G', u'T', u'T'],
               [u'T', u'AH0', u'N', u'T']]),
             ('spot', [[u'S', u'P', u'AA1', u'T', u'S']]),
             ('in', [[u'IH0', u'N', u'IH0']]),
             ('the', [[u'DH', u'AH0', u'DH']]),
             ('caribbean',
              [[u'K', u'EH0', u'K', u'R', u'R'],
               [u'R', u'IH1', u'R', u'B', u'B'],
               [u'B', u'IY2', u'B', u'AH0', u'AH0'],
               [u'AH0', u'N', u'AH0']])])

## NEXT LINE WORKS FOR WRAPPING VOWELS

In [169]:
# Finally working

d2 = copy.deepcopy(check)
for key, val in check.iteritems():
    for i, syl in enumerate(val[1:], 1):
        d2[key][i-1].append(syl[0])

In [170]:
d2

OrderedDict([('forgotten',
              [[u'F', u'ER0', u'G'],
               [u'G', u'AA1', u'T'],
               [u'T', u'AH0', u'N']]),
             ('spot', [[u'S', u'P', u'AA1', u'T']]),
             ('in', [[u'IH0', u'N']]),
             ('the', [[u'DH', u'AH0']]),
             ('caribbean',
              [[u'K', u'EH0', u'R'],
               [u'R', u'IH1', u'B'],
               [u'B', u'IY2', u'AH0'],
               [u'AH0', u'N']])])

In [151]:
from collections import defaultdict

d = defaultdict(list)
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
#         n = len(val[ind+1][0]
        if ind <= (len(val) - 1):
            d[key].append(sound)                  
d

defaultdict(list,
            {'caribbean': [[u'K', u'EH0'],
              [u'R', u'IH1'],
              [u'B', u'IY2'],
              [u'AH0', u'N']],
             'forgotten': [[u'F', u'ER0'],
              [u'G', u'AA1'],
              [u'T', u'AH0', u'N']],
             'in': [[u'IH0', u'N']],
             'spot': [[u'S', u'P', u'AA1', u'T']],
             'the': [[u'DH', u'AH0']]})

In [114]:
from collections import OrderedDict

d = OrderedDict()
for key, val in check.iteritems():
    for ind, sound in enumerate(val):
        temp = copy.deepcopy(sound)
        if ind < (len(val) - 1) and (val[ind+1][0] == 1):
            temp.extend(val[ind+1][0])
            d.update({key:temp})
        else:
             d.update({key:sound})     
                
d

OrderedDict([('forgotten', [u'T', u'AH0', u'N']),
             ('spot', [u'S', u'P', u'AA1', u'T']),
             ('in', [u'IH0', u'N']),
             ('the', [u'DH', u'AH0']),
             ('caribbean', [u'AH0', u'N'])])

less than condition: [u'F', u'ER0']
equal condition: [u'G', u'AA1']
equal condition: [u'T', u'AH0', u'N']
less than condition: [u'S', u'P', u'AA1', u'T']
less than condition: [u'IH0', u'N']
less than condition: [u'DH', u'AH0']
less than condition: [u'K', u'EH0']
equal condition: [u'R', u'IH1']
equal condition: [u'B', u'IY2']
equal condition: [u'AH0', u'N']


In [49]:
len(check.values()[0])

3




0 [[u'F', u'ER0'], [u'G', u'AA1'], [u'T', u'AH0', u'N']]
1 [[u'S', u'P', u'AA1', u'T']]
2 [[u'IH0', u'N']]
3 [[u'DH', u'AH0']]
4 [[u'K', u'EH0'], [u'R', u'IH1'], [u'B', u'IY2'], [u'AH0', u'N']]


In [71]:
for ind, ph in enumerate(check.itervalues()):
    for ind2, sounds in enumerate(ph):
        print ind2, sounds

0 [u'F', u'ER0', u'T', u'T']
1 [u'G', u'AA1', u'T', u'T']
2 [u'T', u'AH0', u'N']
0 [u'S', u'P', u'AA1', u'T']
0 [u'IH0', u'N']
0 [u'DH', u'AH0']
0 [u'K', u'EH0']
1 [u'R', u'IH1']
2 [u'B', u'IY2']
3 [u'AH0', u'N']


In [70]:
import copy

for ind, ph in enumerate(check.itervalues()):
    for ind2, sounds in enumerate(ph):
        temp = copy.deepcopy(sound)
        if (ind2 < len(ph) - 1):
#             temp.extend(ph[ind+2][0])
#             print temp, 'first'
            print ind2, sounds, 'first', ph[ind2+1][0]
        else:
            print ind2, sounds, 'second'

0 [u'F', u'ER0', u'T', u'T'] first G
1 [u'G', u'AA1', u'T', u'T'] first T
2 [u'T', u'AH0', u'N'] second
0 [u'S', u'P', u'AA1', u'T'] second
0 [u'IH0', u'N'] second
0 [u'DH', u'AH0'] second
0 [u'K', u'EH0'] first R
1 [u'R', u'IH1'] first B
2 [u'B', u'IY2'] first AH0
3 [u'AH0', u'N'] second


In [187]:
from ScoreMechanism import ScoreMechanism

In [188]:
score = ScoreMechanism('lyrics/forgot.md')

In [189]:
score.aphabet_dict

OrderedDict([('forgotten', u'F ER0 G AA1 T AH0 N'),
             ('spot', u'S P AA1 T'),
             ('in', u'IH0 N'),
             ('the', u'DH AH0'),
             ('caribbean', u'K EH0 R IH1 B IY2 AH0 N')])

In [194]:
def prRed(prt): print("\033[91m {}\033[00m" .format(prt))
def prGreen(prt): print("\033[92m {}\033[00m" .format(prt))
def prYellow(prt): print("\033[93m {}\033[00m" .format(prt))
def prLightPurple(prt): print("\033[94m {}\033[00m" .format(prt))
def prPurple(prt): print("\033[95m {}\033[00m" .format(prt))
def prCyan(prt): print("\033[96m {}\033[00m" .format(prt))
def prLightGray(prt): print("\033[97m {}\033[00m" .format(prt))
def prBlack(prt): print("\033[98m {}\033[00m" .format(prt))

In [195]:
prGreen("Hello world")

[92m Hello world[00m


In [205]:
# from TextAssemble import TextAssemble
from IPython.core.display import HTML, display

[(u'got', 99),
 (u'ten', 9),
 (u'spot', 99),
 (u'in', 99),
 (u'the', 99),
 ('ca', 99),
 ('rib', 99),
 ('be', 99),
 ('an', 9)]

In [210]:
print 'This is not a bold statement.'
print '<html><em>But this is a bold statement.</em></html>'

This is not a bold statement.
<html><em>But this is a bold statement.</em></html>


In [225]:
from colorama import *
init(autoreset=True)
print Fore.BLACK + 'some' + Fore.GREEN + 'red' + Fore.YELLOW + 'text'
print Style.BRIGHT + Fore.RED + 'some bright red text'

colores = ['BLACK', 'BLUE', 'CYAN', 'GREEN', 'MAGENTA', 'RED', 'YELLOW']
print colores

[30msome [32mred [33mtext
[1m[31msome bright red text


In [319]:
colores = ['BLACK', 'BLUE', 'CYAN', 'GREEN', 'MAGENTA', 'RED', 'YELLOW']
print colores

['BLACK', 'BLUE', 'CYAN', 'GREEN', 'MAGENTA', 'RED', 'YELLOW']


In [266]:
test =      ([(u'for', 99),
             (u'got', 99),
             (u'ten', 9),
             (u'spot', 9),
             (u'in', 99),
             (u'the', 99),
             ('ca', 99),
             ('rib', 99),
             ('be', 99),
             ('an', 9)])

test

[(u'for', 99),
 (u'got', 99),
 (u'ten', 9),
 (u'spot', 99),
 (u'in', 99),
 (u'the', 99),
 ('ca', 99),
 ('rib', 99),
 ('be', 99),
 ('an', 9)]

In [267]:
syl_dict = OrderedDict([('forgotten', [u'for', u'got', u'ten']),
             ('spot', [u'spot']),
             ('in', [u'in']),
             ('the', [u'the']),
             ('caribbean', ['ca', 'rib', 'be', 'an'])])

syl_dict

OrderedDict([('forgotten', [u'for', u'got', u'ten']),
             ('spot', [u'spot']),
             ('in', [u'in']),
             ('the', [u'the']),
             ('caribbean', ['ca', 'rib', 'be', 'an'])])

In [274]:
check = OrderedDict()
for k, v in syl_dict.iteritems():
    check.update({k:len(v)})

check

OrderedDict([('forgotten', 3),
             ('spot', 1),
             ('in', 1),
             ('the', 1),
             ('caribbean', 4)])

In [295]:
word_ind = [0]
for key, leng in check.iteritems():
#     print key, leng
    print test[word_ind:leng]
    word_ind += leng
    

TypeError: slice indices must be integers or None or have an __index__ method

In [246]:
test

[(u'got', 99),
 (u'ten', 9),
 (u'spot', 99),
 (u'in', 99),
 (u'the', 99),
 ('ca', 99),
 ('rib', 99),
 ('be', 99),
 ('an', 9)]

In [251]:
test[5:9]

[('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]

In [255]:
cp = [0]
cp.append(3)
cp.append([3,3+2])
cp

[0, 3, [3, 5]]

In [256]:
print test, check

[(u'got', 99), (u'ten', 9), (u'spot', 99), (u'in', 99), (u'the', 99), ('ca', 99), ('rib', 99), ('be', 99), ('an', 9)] OrderedDict([('forgotten', 3), ('spot', 1), ('in', 1), ('the', 1), ('caribbean', 4)])


In [268]:
word_in = 0
lst = []
for syl, wlen in check.iteritems():
    print test[word_in:(word_in+wlen)]
    word_in += wlen

word_in

[(u'for', 99), (u'got', 99), (u'ten', 9)]
[(u'spot', 99)]
[(u'in', 99)]
[(u'the', 99)]
[('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]


10

In [265]:
test

[(u'got', 99),
 (u'ten', 9),
 (u'spot', 99),
 (u'in', 99),
 (u'the', 99),
 ('ca', 99),
 ('rib', 99),
 ('be', 99),
 ('an', 9)]

In [294]:
type(test[0])

tuple

In [269]:
def word_syl_count(phonetic_syl_dict):
    syl_counts = OrderedDict()
    for key, val in phonetic_syl_dict.iteritems():
        syl_counts.update({key:len(val)})
    return syl_counts

In [288]:
def syl_combine(syl_counts):
    word_ind = 0
    test2 = OrderedDict()
    syl_counts = word_syl_count(syl_counts)
    for key, syl_len in syl_counts.iteritems():
        temp = test[word_ind:(word_ind+syl_len)]
        test2.update({key:temp})
        word_ind += syl_len
    return test2

In [289]:
counts = word_syl_count(syl_dict)
print counts

OrderedDict([('forgotten', 3), ('spot', 1), ('in', 1), ('the', 1), ('caribbean', 4)])


In [296]:
LetsSee = syl_combine(syl_dict)
for k, v in LetsSee.iteritems():
    print k, v

forgotten [(u'for', 99), (u'got', 99), (u'ten', 9)]
spot [(u'spot', 99)]
in [(u'in', 99)]
the [(u'the', 99)]
caribbean [('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]


In [297]:
testing = LetsSee.values()
print testing

[[(u'for', 99), (u'got', 99), (u'ten', 9)], [(u'spot', 99)], [(u'in', 99)], [(u'the', 99)], [('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]]


In [298]:
for i in testing:
    print i

[(u'for', 99), (u'got', 99), (u'ten', 9)]
[(u'spot', 99)]
[(u'in', 99)]
[(u'the', 99)]
[('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]


In [359]:
from colorama import *

black = Fore.BLACK
colors = [Fore.RED, Fore.BLUE, Fore.CYAN, Fore.GREEN, Fore.YELLOW, Fore.MAGENTA]
n = 2

# init(autoreset=True)
print Fore.BLACK + 'some ' + Fore.GREEN + 'red ' + Fore.YELLOW + 'text '
print Style.BRIGHT + Fore.RED + 'some bright red text'

[30msome [32mred [33mtext 
[1m[31msome bright red text


In [363]:
testing

[[(u'for', 99), (u'got', 99), (u'ten', 9)],
 [(u'spot', 99)],
 [(u'in', 99)],
 [(u'the', 99)],
 [('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]]

In [366]:
black = Fore.BLACK
colors = [Fore.RED, Fore.BLUE, Fore.CYAN, Fore.GREEN, Fore.YELLOW, Fore.MAGENTA]
n = 2 - 1


work = OrderedDict()
for key, val in LetsSee.iteritems():
    temp = []
    for syl in val:
        ph, cl = syl[0], syl[1]
        if cl == 99:
            temp.append(Fore.BLACK + ph)
            print  Fore.BLACK + ph
        else:
#             color_ind = clusters.index(9)
            color_ind = 0
            print colors[color_ind] + 'something'


[30mfor
[30mgot
[31msomething
[30mspot
[30min
[30mthe
[30mca
[30mrib
[30mbe
[31msomething


In [307]:
lyrics = [['forgotten', 'spot', 'in', 'the', 'caribbean']]
lyrics

[['forgotten', 'spot', 'in', 'the', 'caribbean']]

In [312]:
for line in text.lyrics_tokenized:
    for word in line:
        syls = text.grouped_syl
        print word
#         print LetsSee[i]

forgotten
spot
in
the
caribbean


In [318]:
for line in lyrics:
    for word in line:
        syls = LetsSee[word]
        print syls
#     print line

[(u'for', 99), (u'got', 99), (u'ten', 9)]
[(u'spot', 99)]
[(u'in', 99)]
[(u'the', 99)]
[('ca', 99), ('rib', 99), ('be', 99), ('an', 9)]


In [5]:
# from TextAssemble import TextAssemble

ImportError: No module named mcl_clustering

In [4]:
# from mcl_clustering import mcl