Skip to content

Commit 7677d76

Browse files
authored
examples,strings: add a new strings.lorem module; use it in the Markov chain text generator example (#26387)
1 parent 394abc1 commit 7677d76

File tree

3 files changed

+302
-209
lines changed

3 files changed

+302
-209
lines changed

examples/lorem.v

Lines changed: 15 additions & 209 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,8 @@
22
Random Markov Text Generator
33
44
This program generates pseudo-random text using a Markov chain built from
5-
one of several embedded corpora. It produces structured output in the
6-
form of paragraphs and sentences, with configurable parameters for:
7-
8-
- Markov order (n-gram size)
9-
- Words per sentence
10-
- Sentences per paragraph
11-
- Paragraph count
12-
- Optional seed phrases and RNG seed
13-
- Optional corpus selection
14-
15-
Features:
16-
17-
- Five built-in seed phrases, randomly chosen if no seed is provided
18-
- Paragraphs and sentences with ±20% variability in lengths
19-
- Automatic reseeding from corpus if seed phrases do not exist in the model
20-
- Fully self-contained; no external corpus files required
21-
- Can be run with no parameters and produces readable, multi-paragraph text
5+
one of several embedded corpora in the strings module. It produces structured output in the
6+
form of paragraphs and sentences, with configurable parameters.
227
238
Usage:
249
@@ -29,72 +14,11 @@ Example:
2914
3015
./lorem -order 2 -words 12 -sentences 4 -paragraphs 3 -corpus poe
3116
*/
32-
import rand
17+
import strings.lorem
3318
import flag
3419
import os
35-
36-
// ---------------- Embedded Corpora ----------------
37-
38-
const corpora = {
39-
'lorem': lorem_corpus
40-
'poe': poe_corpus
41-
'darwin': darwin_corpus
42-
'bard': shakespeare_corpus
43-
}
44-
45-
const lorem_corpus = '
46-
lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor
47-
incididunt ut labore et dolore magna aliqua Ut enim ad minim veniam quis
48-
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat
49-
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore
50-
eu fugiat nulla pariatur Excepteur sint occaecat cupidatat non proident sunt
51-
in culpa qui officia deserunt mollit anim id est laborum
52-
'
53-
54-
const poe_corpus = '
55-
once upon a midnight dreary while I pondered weak and weary
56-
over many a quaint and curious volume of forgotten lore
57-
while I nodded nearly napping suddenly there came a tapping
58-
as of someone gently rapping at my chamber door
59-
'
60-
61-
const darwin_corpus = '
62-
when we look to the individuals of the same variety or sub variety of our
63-
older cultivated plants and animals one of the first points which strikes
64-
us is that they generally differ much more from each other than do the
65-
individuals of any one species or variety in a state of nature
66-
'
67-
68-
const shakespeare_corpus = '
69-
to be or not to be that is the question
70-
all the worlds a stage and all the men and women merely players
71-
the lady doth protest too much methinks
72-
a rose by any other name would smell as sweet
73-
et tu brute
74-
if music be the food of love play on
75-
now is the winter of our discontent
76-
we are such stuff as dreams are made on
77-
brevity is the soul of wit
78-
some are born great some achieve greatness and some have greatness thrust upon them
79-
cry havoc and let slip the dogs of war
80-
all that glisters is not gold
81-
the fault dear brutus is not in our stars but in ourselves
82-
to thine own self be true
83-
lord what fools these mortals be
84-
shall i compare thee to a summers day
85-
'
86-
87-
// ---------------- Seed Phrases ----------------
88-
89-
const seed_phrases = [
90-
'in the beginning',
91-
'once upon a time',
92-
'it was the first',
93-
'when we consider',
94-
'there was a moment',
95-
]
96-
97-
// ---------------- Main ----------------
20+
import rand
21+
import time
9822

9923
fn main() {
10024
mut fp := flag.new_flag_parser(os.args[1..])
@@ -106,17 +30,23 @@ fn main() {
10630
words_per_sentence := fp.int('words', `w`, 10, 'Words per sentence [default: 10]')
10731
sentences_per_paragraph := fp.int('sentences', `s`, 5, 'Sentences per paragraph [default: 5]')
10832
paragraphs := fp.int('paragraphs', `p`, 3, 'Paragraph count [default: 3]')
109-
corpus_name := fp.string('corpus', `c`, 'lorem', 'Corpus name (lorem, poe, darwin, bard) [default: lorem')
33+
corpus_name := fp.string('corpus', `c`, 'lorem', 'Corpus name (lorem, poe, darwin, bard) [default: lorem]')
11034
seed_text := fp.string('seed', `S`, '', 'Seed phrase (random if omitted)')
111-
rng_seed := fp.int('rngseed', `r`, 0, 'RNG seed (0 = non-deterministic)')
35+
mut rng_seed := fp.int('rngseed', `r`, 0, 'RNG seed (0 = random)')
11236

11337
fp.finalize() or {
11438
eprintln(err)
11539
return
11640
}
11741

118-
text := generate_text(
119-
order: order
42+
if rng_seed == 0 {
43+
t := time.now().unix_milli()
44+
rand.seed([u32(t), u32(t >> 32)])
45+
rng_seed = rand.int()
46+
}
47+
48+
text := lorem.generate(
49+
markov_order: order
12050
words_per_sentence: words_per_sentence
12151
sentences_per_paragraph: sentences_per_paragraph
12252
paragraphs: paragraphs
@@ -127,127 +57,3 @@ fn main() {
12757

12858
println(text)
12959
}
130-
131-
struct LoremCfg {
132-
order int = 2
133-
words_per_sentence int = 10
134-
sentences_per_paragraph int = 5
135-
paragraphs int = 3
136-
corpus_name string
137-
seed_text string
138-
rng_seed int
139-
}
140-
141-
// ---------------- Text Generation ----------------
142-
143-
fn generate_text(cfg LoremCfg) string {
144-
if cfg.rng_seed != 0 {
145-
rand.seed([u32(cfg.rng_seed)])
146-
}
147-
148-
seed := match cfg.seed_text != '' {
149-
true { cfg.seed_text }
150-
else { random_seed_phrase() }
151-
}
152-
153-
corpus := select_corpus(cfg.corpus_name)
154-
tokens := tokenize(corpus)
155-
156-
if tokens.len <= cfg.order {
157-
eprintln('corpus too small for selected order')
158-
return ''
159-
}
160-
161-
model := build_markov(tokens, cfg.order)
162-
163-
mut state := tokenize(seed)
164-
if state.len < cfg.order {
165-
start := rand.intn(tokens.len - cfg.order) or { 0 }
166-
state = tokens[start..start + cfg.order].clone()
167-
}
168-
169-
mut out := []string{}
170-
171-
for pi in 0 .. cfg.paragraphs {
172-
if pi != 0 {
173-
out << '\n\n'
174-
}
175-
sentences := vary(cfg.sentences_per_paragraph, 1)
176-
177-
for si in 0 .. sentences {
178-
if si != 0 {
179-
out << ' '
180-
}
181-
words := vary(cfg.words_per_sentence, 3)
182-
mut sentence := []string{}
183-
184-
for _ in 0 .. words {
185-
key := state.join('\u0001')
186-
nexts := model[key] or {
187-
start := rand.intn(tokens.len - cfg.order) or { 0 }
188-
state = tokens[start..start + cfg.order].clone()
189-
continue
190-
}
191-
192-
next := nexts[rand.intn(nexts.len) or { 0 }]
193-
sentence << next
194-
195-
state = state[1..].clone()
196-
state << next
197-
}
198-
199-
if sentence.len > 0 {
200-
out << sentence.join(' ').capitalize()
201-
out << '.'
202-
}
203-
}
204-
}
205-
206-
return out.join('')
207-
}
208-
209-
// ---------------- Utilities ----------------
210-
211-
fn vary(base int, min int) int {
212-
delta := int(f32(base) * 0.2)
213-
if delta == 0 {
214-
return base
215-
}
216-
offset := rand.intn(delta * 2 + 1) or { 0 } - delta
217-
val := base + offset
218-
return if val < min { min } else { val }
219-
}
220-
221-
fn select_corpus(name string) string {
222-
if name != '' {
223-
if corpus := corpora[name] {
224-
return corpus
225-
}
226-
eprintln('unknown corpus: ${name}')
227-
exit(1)
228-
}
229-
230-
keys := corpora.keys()
231-
key := keys[rand.intn(keys.len) or { 0 }]
232-
return corpora[key]
233-
}
234-
235-
fn random_seed_phrase() string {
236-
return seed_phrases[rand.intn(seed_phrases.len) or { 0 }]
237-
}
238-
239-
fn tokenize(text string) []string {
240-
return text
241-
.replace_each(['\n', ' ', '\t', ' '])
242-
.split(' ')
243-
.filter(it.len > 0)
244-
}
245-
246-
fn build_markov(tokens []string, order int) map[string][]string {
247-
mut model := map[string][]string{}
248-
for i in 0 .. tokens.len - order {
249-
key := tokens[i..i + order].join('\u0001')
250-
model[key] << tokens[i + order]
251-
}
252-
return model
253-
}

0 commit comments

Comments
 (0)