In [1]:
import os
os.chdir('..')
import numpy as np
from src.graph.Graph import UndirectedGraph
from src.algorithms.random_walk import RandomWalker
from src.algorithms.redundant_visitations import IRT


np.random.seed(1)

In [2]:
g = UndirectedGraph()
walker_texas_ranger = RandomWalker(g)
for i in range(100):
    g.add_node(str(i))
for i in range(10):
    for j in range(10):
        if i != j:
            g.add_edge(str(i), str(j), np.random.rand(1))
path = walker_texas_ranger.run(10)
rv = IRT.calculate(path)
print(path)
print(rv)

['5', '6', '3', '5', '8', '3', '9', '5', '0', '9']
[(None, '5', 0), ('5', '6', 0), ('6', '3', 0), ('3', '8', 1), ('8', '9', 1), ('9', '0', 1)]


In [3]:
# import numpy as np
# import os
# os.chdir('..')
from src.graph.semantic_network import SemanticNetwork
from src.text.text_wrangler import Corpus
from gensim.models import Word2Vec



In [4]:
shakespeare = Corpus("docs/shakespeare.txt")
model = Word2Vec(shakespeare.sentence_matrix, size = 120,
                 window = 5, min_count=5, workers=8, sg=1)

In [5]:
network = SemanticNetwork(embeddings=model.wv.vectors, aligned_keys=model.wv.index2word)

In [6]:
for i in range(1000):
    model.train(shakespeare.sentence_matrix, total_examples=len(shakespeare.sentence_matrix),
                epochs=1, compute_loss=True)
    print("Round {} ==================".format(i))
    network.update(em_proportion=1, g_proportion=1, include_set=shakespeare.nouns, stop_set=shakespeare.stopwords, thresh=0.8, verbose=True)

Updated 4187668 edges
Updated 2375388 edges
Updated 1161502 edges
Updated 505136 edges
Updated 214866 edges
Updated 97938 edges
Updated 51382 edges
Updated 31360 edges
Updated 21554 edges
Updated 16324 edges
Updated 13492 edges
Updated 11530 edges
Updated 10252 edges
Updated 9370 edges
Updated 8686 edges
Updated 8224 edges
Updated 7812 edges
Updated 7444 edges
Updated 7106 edges
Updated 6830 edges
Updated 6578 edges
Updated 6396 edges
Updated 6178 edges
Updated 6024 edges
Updated 5862 edges
Updated 5702 edges
Updated 5576 edges
Updated 5484 edges
Updated 5374 edges
Updated 5278 edges
Updated 5196 edges
Updated 5122 edges
Updated 5058 edges
Updated 4998 edges
Updated 4952 edges
Updated 4896 edges
Updated 4858 edges
Updated 4832 edges
Updated 4786 edges
Updated 4754 edges
Updated 4732 edges
Updated 4700 edges
Updated 4672 edges
Updated 4654 edges
Updated 4628 edges
Updated 4606 edges
Updated 4590 edges
Updated 4572 edges
Updated 4562 edges
Updated 4550 edges
Updated 4526 edges
Updated 45

KeyboardInterrupt: 

In [7]:
wtr = RandomWalker(network.graph)

In [8]:
path = wtr.run(1000)
path

['infirmity',
 'provision',
 'loathes',
 'infer',
 'foil',
 'profession',
 'accusation',
 'hallow',
 'penalty',
 'quillets',
 'capacity',
 'taunt',
 'commonweal',
 'reynaldo',
 'carlisle',
 'amiens',
 'fortinbras',
 'bushy',
 'surveyor',
 'cinna',
 'messala',
 'portia',
 'iachimo',
 'francisco',
 'cimber',
 'francisco',
 'paulina',
 'aeneas',
 'miranda',
 'morocco',
 'banquo',
 'menteith',
 'discovers',
 'sleeper',
 'sennet',
 'albans',
 'snug',
 'calchas',
 'brothel',
 'batter',
 'quiver',
 'misfortune',
 'griev',
 'suppos',
 'canon',
 'fort',
 'tenour',
 'whiteness',
 'cricket',
 'proclamation',
 'refrain',
 'alcides',
 'resistance',
 'welshman',
 'pilgrim',
 'bandy',
 'precept',
 'recorder',
 'ivy',
 'doctrine',
 'expo',
 'lump',
 'humane',
 'indignity',
 'mischance',
 'accord',
 'blanket',
 'restless',
 'obeys',
 'inhabit',
 'deface',
 'scoff',
 'tyrus',
 'travell',
 'sandy',
 'drowns',
 'likelihood',
 'trap',
 'barefoot',
 'licence',
 'burthen',
 'deceit',
 'arrival',
 'vicar',
 '

In [9]:
tot = 0
for x in IRT.calculate(path): tot += x[2]
tot

178

In [10]:
seen = set()
dup = set()
for x in path:
    if x in seen:
        dup.add(x)
    seen.add(x)
    
for x in dup:
    print(x)
        

grease
eate
twig
mickle
perdition
trap
contention
grudge
dunghill
celerity
aqua
cinna
practis
impatience
helicane
admittance
troyan
beseeming
homage
hum
quietness
ides
shap
suburb
travell
damage
pill
parthia
mann
wardrobe
cattle
armor
prophetess
arrogance
appointment
divinity
discreet
proclamation
family
cine
shuns
bitterness
recoil
immodest
fortitude
precept
sandy
founder
co
persuades
scatt
scotch
loathes
ravenspurgh
doctrine
smelling
thames
delivers
quiver
confines
valerius
environ
smack
sequent
reconcile
tereus
reveal
francisco
huswife
annoyance
lodowick
montagues
declin
vulture
chastise
refrain
shackle
fresher
ague
chivalry
beck
quillets
taunt
newes
incorporate
rout
enlargement
potion
griev
suppos
speechless
nerve
sleeper
loo
consist
forbids
supple
mi
control
card
unclasp
poland
caparison
swagger
saide
pinion
lecture
gaudy
excels
thunderbolt
tongu
dealt
restrain
cramp
comforter
giv
batter
blanket
cricket
miranda
penalty
epithet
limitation
porpentine
cambio
gesture
discomfort
attorn

In [11]:
'juliet' in shakespeare.nouns

False

In [12]:
'juliet' in shakespeare.adjectives

True

In [13]:
wtr = RandomWalker(network.graph, start='romeo')
path = wtr.run(100)
path

['romeo',
 'tybalt',
 'imogen',
 'cordelia',
 'beadle',
 'donalbain',
 'servingman',
 'dowager',
 'wormwood',
 'shone',
 'appliance',
 'pridge',
 'tam',
 'beguil',
 'conduit',
 'parasite',
 'spout',
 'surety',
 'lubber',
 'mightier',
 'holder',
 'accuser',
 'allegiance',
 'mistaken',
 'chill',
 'mislike',
 'ewe',
 'allowance',
 'aqua',
 'stillness',
 'pupil',
 'nerve',
 'bribe',
 'emulation',
 'devours',
 'bath',
 'diet',
 'provision',
 'zounds',
 'recreation',
 'grudge',
 'proclamation',
 'unity',
 'ace',
 'misty',
 'piercing',
 'blanket',
 'albion',
 'tyre',
 'morton',
 'jourdain',
 'cato',
 'aeneas',
 'antigonus',
 'cleomenes',
 'mace',
 'clout',
 'stirrup',
 'libertine',
 'roof',
 'complement',
 'craft',
 'price',
 'exception',
 'dispos',
 'advanc',
 'mix',
 'preserv',
 'heav',
 'diet',
 'clasp',
 'pageant',
 'weather',
 'competitor',
 'subjection',
 'material',
 'sufferance',
 'imprisonment',
 'humanity',
 'rack',
 'presumption',
 'par',
 'hoard',
 'lath',
 'lawless',
 'reproach',