22Random Markov Text Generator
33
44This program generates pseudo-random text using a Markov chain built from
5- one of several embedded corpora. It produces structured output in the
6- form of paragraphs and sentences, with configurable parameters for:
7-
8- - Markov order (n-gram size)
9- - Words per sentence
10- - Sentences per paragraph
11- - Paragraph count
12- - Optional seed phrases and RNG seed
13- - Optional corpus selection
14-
15- Features:
16-
17- - Five built-in seed phrases, randomly chosen if no seed is provided
18- - Paragraphs and sentences with ±20% variability in lengths
19- - Automatic reseeding from corpus if seed phrases do not exist in the model
20- - Fully self-contained; no external corpus files required
21- - Can be run with no parameters and produces readable, multi-paragraph text
5+ one of several embedded corpora in the strings module. It produces structured output in the
6+ form of paragraphs and sentences, with configurable parameters.
227
238Usage:
249
@@ -29,72 +14,11 @@ Example:
2914
3015 ./lorem -order 2 -words 12 -sentences 4 -paragraphs 3 -corpus poe
3116*/
32- import rand
17+ import strings.lorem
3318import flag
3419import os
35-
36- // ---------------- Embedded Corpora ----------------
37-
38- const corpora = {
39- 'lorem' : lorem_corpus
40- 'poe' : poe_corpus
41- 'darwin' : darwin_corpus
42- 'bard' : shakespeare_corpus
43- }
44-
45- const lorem_corpus = '
46- lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor
47- incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis
48- nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat
49- Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore
50- eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt
51- in culpa qui officia deserunt mollit anim id est laborum
52- '
53-
54- const poe_corpus = '
55- once upon a midnight dreary while I pondered weak and weary
56- over many a quaint and curious volume of forgotten lore
57- while I nodded nearly napping suddenly there came a tapping
58- as of someone gently rapping at my chamber door
59- '
60-
61- const darwin_corpus = '
62- when we look to the individuals of the same variety or sub variety of our
63- older cultivated plants and animals one of the first points which strikes
64- us is that they generally differ much more from each other than do the
65- individuals of any one species or variety in a state of nature
66- '
67-
68- const shakespeare_corpus = '
69- to be or not to be that is the question
70- all the worlds a stage and all the men and women merely players
71- the lady doth protest too much methinks
72- a rose by any other name would smell as sweet
73- et tu brute
74- if music be the food of love play on
75- now is the winter of our discontent
76- we are such stuff as dreams are made on
77- brevity is the soul of wit
78- some are born great some achieve greatness and some have greatness thrust upon them
79- cry havoc and let slip the dogs of war
80- all that glisters is not gold
81- the fault dear brutus is not in our stars but in ourselves
82- to thine own self be true
83- lord what fools these mortals be
84- shall i compare thee to a summers day
85- '
86-
87- // ---------------- Seed Phrases ----------------
88-
89- const seed_phrases = [
90- 'in the beginning' ,
91- 'once upon a time' ,
92- 'it was the first' ,
93- 'when we consider' ,
94- 'there was a moment' ,
95- ]
96-
97- // ---------------- Main ----------------
20+ import rand
21+ import time
9822
9923fn main () {
10024 mut fp := flag.new_flag_parser (os.args[1 ..])
@@ -106,17 +30,23 @@ fn main() {
10630 words_per_sentence := fp.int ('words' , `w` , 10 , 'Words per sentence [default: 10]' )
10731 sentences_per_paragraph := fp.int ('sentences' , `s` , 5 , 'Sentences per paragraph [default: 5]' )
10832 paragraphs := fp.int ('paragraphs' , `p` , 3 , 'Paragraph count [default: 3]' )
109- corpus_name := fp.string ('corpus' , `c` , 'lorem' , 'Corpus name (lorem, poe, darwin, bard) [default: lorem' )
33+ corpus_name := fp.string ('corpus' , `c` , 'lorem' , 'Corpus name (lorem, poe, darwin, bard) [default: lorem] ' )
11034 seed_text := fp.string ('seed' , `S` , '' , 'Seed phrase (random if omitted)' )
111- rng_seed := fp.int ('rngseed' , `r` , 0 , 'RNG seed (0 = non-deterministic )' )
35+ mut rng_seed := fp.int ('rngseed' , `r` , 0 , 'RNG seed (0 = random )' )
11236
11337 fp.finalize () or {
11438 eprintln (err)
11539 return
11640 }
11741
118- text := generate_text (
119- order: order
42+ if rng_seed == 0 {
43+ t := time.now ().unix_milli ()
44+ rand.seed ([u32 (t), u32 (t >> 32 )])
45+ rng_seed = rand.int ()
46+ }
47+
48+ text := lorem.generate (
49+ markov_order: order
12050 words_per_sentence: words_per_sentence
12151 sentences_per_paragraph: sentences_per_paragraph
12252 paragraphs: paragraphs
@@ -127,127 +57,3 @@ fn main() {
12757
12858 println (text)
12959}
130-
131- struct LoremCfg {
132- order int = 2
133- words_per_sentence int = 10
134- sentences_per_paragraph int = 5
135- paragraphs int = 3
136- corpus_name string
137- seed_text string
138- rng_seed int
139- }
140-
141- // ---------------- Text Generation ----------------
142-
143- fn generate_text (cfg LoremCfg) string {
144- if cfg.rng_seed != 0 {
145- rand.seed ([u32 (cfg.rng_seed)])
146- }
147-
148- seed := match cfg.seed_text != '' {
149- true { cfg.seed_text }
150- else { random_seed_phrase () }
151- }
152-
153- corpus := select_corpus (cfg.corpus_name)
154- tokens := tokenize (corpus)
155-
156- if tokens.len < = cfg.order {
157- eprintln ('corpus too small for selected order' )
158- return ''
159- }
160-
161- model := build_markov (tokens, cfg.order)
162-
163- mut state := tokenize (seed)
164- if state.len < cfg.order {
165- start := rand.intn (tokens.len - cfg.order) or { 0 }
166- state = tokens[start..start + cfg.order].clone ()
167- }
168-
169- mut out := []string {}
170-
171- for pi in 0 .. cfg.paragraphs {
172- if pi != 0 {
173- out << '\n\n '
174- }
175- sentences := vary (cfg.sentences_per_paragraph, 1 )
176-
177- for si in 0 .. sentences {
178- if si != 0 {
179- out << ' '
180- }
181- words := vary (cfg.words_per_sentence, 3 )
182- mut sentence := []string {}
183-
184- for _ in 0 .. words {
185- key := state.join ('\u0001 ' )
186- nexts := model[key] or {
187- start := rand.intn (tokens.len - cfg.order) or { 0 }
188- state = tokens[start..start + cfg.order].clone ()
189- continue
190- }
191-
192- next := nexts[rand.intn (nexts.len) or { 0 }]
193- sentence << next
194-
195- state = state[1 ..].clone ()
196- state << next
197- }
198-
199- if sentence.len > 0 {
200- out << sentence.join (' ' ).capitalize ()
201- out << '.'
202- }
203- }
204- }
205-
206- return out.join ('' )
207- }
208-
209- // ---------------- Utilities ----------------
210-
211- fn vary (base int , min int ) int {
212- delta := int (f32 (base) * 0.2 )
213- if delta == 0 {
214- return base
215- }
216- offset := rand.intn (delta * 2 + 1 ) or { 0 } - delta
217- val := base + offset
218- return if val < min { min } else { val }
219- }
220-
221- fn select_corpus (name string ) string {
222- if name != '' {
223- if corpus := corpora[name] {
224- return corpus
225- }
226- eprintln ('unknown corpus: ${name} ' )
227- exit (1 )
228- }
229-
230- keys := corpora.keys ()
231- key := keys[rand.intn (keys.len) or { 0 }]
232- return corpora[key]
233- }
234-
235- fn random_seed_phrase () string {
236- return seed_phrases[rand.intn (seed_phrases.len) or { 0 }]
237- }
238-
239- fn tokenize (text string ) []string {
240- return text
241- .replace_each (['\n ' , ' ' , '\t ' , ' ' ])
242- .split (' ' )
243- .filter (it .len > 0 )
244- }
245-
246- fn build_markov (tokens []string , order int ) map [string ][]string {
247- mut model := map [string ][]string {}
248- for i in 0 .. tokens.len - order {
249- key := tokens[i..i + order].join ('\u0001 ' )
250- model[key] << tokens[i + order]
251- }
252- return model
253- }
0 commit comments