-
Notifications
You must be signed in to change notification settings - Fork 41
/
markov_sentence_generator.rb
113 lines (104 loc) · 3.57 KB
/
markov_sentence_generator.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Messing about with the NullObject pattern, can't apply it in too many
# places in this one. Need to evaluate what else could be used in this
# aside from my first instinct of defaulting to []
# @private
class NullObject # :nodoc:
def method_missing (*args, &block)
self
end
def nil?; true; end
def <<(*); end
def to_str; end
def to_ary; []; end
end
# @private
NULL_OBJECT = NullObject.new # :nodoc:
# @private
class EmptyDictionaryError < Exception # :nodoc:
end
# @private
class MarkovSentenceGenerator # :nodoc:
def initialize(dictionary)
@dictionary = dictionary
@depth = @dictionary.depth
end
# Returns a random word via picking a random key from the dictionary.
# In the case of the TwoWordDictionary, it returns two words to ensure
# that the sentence will have a valid two word string to pick the next
# word from.
# wordslength
# @return [String] a string containing a random dictionary key.
def random_word
words = @dictionary.dictionary.keys
words[rand(words.length)]
end
# Generates a random capitalized word via picking a random word from a list
# of known capitalized words created during dictionary generation
#
# (see #random_word)
def random_capitalized_word
@dictionary.capitalized_words.sample
end
# Returns a word based upon the likelihood of it appearing after the supplied word.
#
def weighted_random(lastword)
# If word has no words in its dictionary (last word in source text file)
# have it pick a random word to display instead.
@dictionary.dictionary.fetch(lastword, NULL_OBJECT).sample
end
def punctuation?(word)
( word =~ /[!?]/ || word == '.' )
end
# Generates a sentence of (wordcount) length using the weighted_random function.
#
# @param [Int] wordcount The number of words you want the generated string to contain.
# @return [String] the words, hopefully forming sentences generated.
def generate(wordcount)
if @dictionary.dictionary.empty?
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
end
sentence = []
sentence.push(random_capitalized_word)
(wordcount-1).times do
word = weighted_random(sentence.last(@depth))
if punctuation?(word)
sentence[-1] = sentence.last.dup << word
sentence.push(random_capitalized_word)
elsif word.nil?
sentence.push(random_capitalized_word)
else
sentence << word
end
end
sentence.pop(sentence.length-wordcount)
sentence.join(' ')
end
# Generates a (sentencecount) sentences using the weighted_random function.
#
# @param [Int] sentencecount The number of sentences you want the generated string to contain.
# @return [String] the sentence(s) generated.
def generate_sentence(sentencecount)
if @dictionary.dictionary.empty?
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string!")
end
sentence = []
# Find out how many actual keys are in the dictionary.
key_count = @dictionary.dictionary.keys.length
# If less than 30 keys, use that plus five as your maximum sentence length.
maximum_length = key_count < 30 ? key_count + 5 : 30
sentencecount.times do
wordcount = 0
sentence.push(random_capitalized_word)
until (punctuation?(sentence.last[-1])) || wordcount > maximum_length
wordcount += 1
word = weighted_random(sentence.last(@depth))
if punctuation?(word)
sentence[-1] = sentence.last.dup << word
else
sentence << word
end
end
end
sentence.join(' ')
end
end