/
index.cgi
executable file
·533 lines (487 loc) · 18.4 KB
/
index.cgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
#!/usr/bin/perl
#
# lexicon.cgi - online search engine for zrajm's klingon database
###############################################################################
## ##
## Initializations ##
## ##
###############################################################################
use warnings;
use strict;
use utf8;
use CGI qw(:standard);
use Encode 'decode';
binmode(STDIN, ":encoding(utf8)");
binmode(STDOUT, ":encoding(utf8)");
$SIG{__DIE__} = sub {
my ($msg) = @_;
print '<h2>ERROR: Program halted</h2>',
'<ul>', (
map {
'<li>' . escapeHTML($_) . '</li>'
} split(/\n/, $msg)
), '</ul>';
exit;
};
# output content-type header
# (when used as SSI or loaded explicitly by
# browser, but not when called by other script)
if (not $ENV{X_CGI}) { # if not suppressed
print header(-charset=>'utf-8'); # Content-type header
$ENV{X_CGI} = "perl"; # and suppress it from now on
} #
sub get_source_names {
# read dir
my @file = do {
opendir(my $dh, "../canon")
or die "Can't open current dir: $!";
grep /^\d.*\.txt$/, readdir($dh);
};
# abbr contains source_abbrev => file
my %file = map { # for each transcript file
my $file = $_;
s/\.txt$//; # strip '.txt'
if (not /-(news|email|qepa|web)$/) { # unless email/news/web/qep'a'
s/^(\d+-)+//; # strip leading date
}
($_ => $file);
} @file;
$file{tkda} = $file{tkd}; # add for TKDa
return %file;
}
sub link_sources {
my @source = @_;
return join "; ", map {
s/^\s+//;
s/\s+$//;
link_source($_);
} split_sources(@source);
}
{
my %file;
sub link_source {
local ($_) = @_;
if (not %file) { %file = get_source_names() };
my ($abbr, $rest) = /^(\S*)(.*)/; # 1st word of source + rest
# process secondary sources
my $has_secondary_source = do {
$rest =~ s{\((.*)\)}{
"(" . link_sources($1) . ")";
}e ? 1 : 0;
};
# downcase + remove apostrophes and HTML
# ('cuz HTML <mark> tags are inserted by query)
(my $lcabbr = lc $abbr) =~ s/(?:<.*?>|')//g;
my $file = $file{$lcabbr} || ""; # transcript file name
if (not $file) { return "$abbr$rest" } # fallback (should never happen)
if ($has_secondary_source) {
return '<a href="../canon/'.$file.'">'.$abbr.'</a>'.$rest;
} else {
return '<a href="../canon/'.$file.'">'.$abbr.$rest.'</a>';
}
}
}
# split string on semicolon, returns list with result
# (semicolons inside parentheses are ignored when splitting)
sub split_sources {
my ($str) = @_;
return ("$str;" =~ /(.*?(?:\([^)]*\)[^;(]*)*);/g);
}
sub link_last_source {
my ($cite) = @_;
$cite =~ s{(.*)\[(.*?)\]$}{
$1 . "[" . link_source($2) . "]";
}e;
return $cite;
}
our %postprocess = (
def => \&link_sources,
ref => \&link_sources,
cite => \&link_last_source, # link sources inside last [...]
desc => \&link_last_source, # link sources inside last [...]
);
our %field = (
tlh => "Klingon word",
warn => "Warning",
pos => "Part-of-speech",
sv => "Swedish translation",
en => "English translation",
desc => "Description",
def => "Defining source",
ref => "Referring source",
com => "Comment",
pun => "Pun",
see => "See also",
tag => "Tags",
data => "Data",
id => "Permanent Entry ID",
meta => "Metadata",
);
our @tips = (
'Hover over a field in search results to see field description.',
'Use a search prefix (e.g. <a href="?q=tag:klcp1"><b>tag:klcp1</b></a>) ' .
'to search only a given field (here: <b>tag:</b>).',
'Use <a href="?q=def:holqed-10-4"><b>def:holqed-10-4</b></a> to find all words ' .
'first occurring in <i><a href="../canon/2001-12-holqed-10-4.txt">' .
'<b lang=tlh>HolQeD</b> issue 10:4</a>.</i>',
'Prefixes: <b>tlh:</b> = Klingon, <b>en:</b> = English, <b>sv:</b> = ' .
'Swedish, <b>pos:</b> = part-of-speech.',
'Prefixes: <b>com:</b> = comment, <b>def:</b> = defining source, ' .
'<b>ref:</b> = referring source.',
'Use <a href="?q=tlh:*chuq"><b>tlh:*chuq</b></a> to finds all Klingon ' .
'words ending in <b>chuq.</b>',
'Use <a href="?q=tag:klcp1"><b>tag:klcp1</b></a> to find all ' .
'<a href="../klcp.html#_6">first level words</a> from the<br>' .
'<a href="../klcp.html"><i>Klingon Language Certification Program' .
'</i></a>.',
'Use <a href="?q=def:kgt"><b>def:kgt</b></a> to find all words first ' .
'occurring in <i><a href="../canon/1997-11-01-kgt.txt">KGT</a>.</i>',
'Put <b>tlh:</b> before a word to search only Klingon fields for it.',
'Put <b>en:</b> before a word to search only English fields for it.',
'Put <b>sv:</b> before a word to search only Swedish fields for it.',
'Use <b>pos:v</b> to search for <i>verbs</i> (see <i>' .
'<a href="intro.html#pos">Introduction</a></i> for other word ' .
'types).</i>',
'Use <a href="?q=pos:ns2"><b>pos:ns2</b></a> to find <i>noun suffixes '.
'type 2</i> (also try numbers <i>1–5</i>).',
'Use <a href="?q=pos:vs1"><b>pos:vs1</b></a> to find <i>verb suffixes ' .
'type 1</i> (also try letter <i>r,</i> and numbers <i>1–9</i>).',
'Use <a href="?q=pos:vsr"><b>pos:vsr</b></a> or <a href="?q=pos:rover">' .
'<b>pos:rover</b></a> to search for <i>verb suffix rovers.</i>',
'Use <b>*</b> to mean any sequence of letters when searching.',
'Use quotes (<b>"…"</b>) to search for several words after each other.',
'With multiple search words, all are needed for a match (e.g. ' .
'<a href="?q=en:battle pos:v"><b>en:battle pos:v</b></a>).',
'Search is case insensitive, except with <b>tlh:</b> prefix.',
);
################################################################################
### ##
### Subroutines ##
### ##
################################################################################
sub read_dictionary {
my ($file) = @_;
open(my $fh, "<:encoding(utf8)", $file) or
die "cannot open dictionary file '$file'";
# skip file header
while (<$fh>) {
last if $_ eq "== start-of-data ==\n";
}
# read dictionary
my @buf = ();
while (<$fh>) {
chomp();
last if $_ =~ /^== end-of-data ==$/; # terminate at file footer
next if /^===\s/; # skip section delimiters
if (/^$/) { # beginning of new post
push(@buf, "");
next if /^$/; # skip to next line in file
} # if this one is empty now
# indented line, or new field
if (s/^\s+//) { # if line begins with white space
$buf[$#buf] .= " " . $_; # join it to buffer's last line
} else { # otherwise
$buf[$#buf] .= "\n" . $_; # add new line to buffer
}
}
close($fh);
return map { s/[<>«»]//g; $_ } @buf;
}
# Usage: @REGEX = split_query($QUERY);
#
# Takes a QUERY and splits it into phrases (a phrase is a word or a double
# quoted string).
#
# Returs a list of regexes for matching that query. A phrase may start with a
# field name (e.g. 'tlh:') to match only that field. If a phrase is quoted then
# the field name must occur before the quotes, i.e. 'tlh:"hej"' is interpreted
# the same as 'tlh:hej' (search field "tlh" for string "hej"), while
# '"tlh:hej"' is interpreted literally (search any field for string "tlh:hej").
#
# TODO: implement negative queries?
sub split_query {
my ($query) = @_;
# split query into words and quoted strings
my @subquery = $query =~ m/([^\s"]+(?:"[^"]+"?)?|"[^"]+"?)/g;
my %pos = (
v => "verb",
n => "noun",
name => "name",
adv => "adverbial",
conj => "conjunction",
excl => "exclamation",
num => "numeral",
pro => "pronoun",
ques => "question word",
ns1 => "noun suffix type 1",
ns2 => "noun suffix type 2",
ns3 => "noun suffix type 3",
ns4 => "noun suffix type 4",
ns5 => "noun suffix type 5",
vp => "verb prefix",
vsr => "verb suffix type rover",
vs1 => "verb suffix type 1",
vs2 => "verb suffix type 2",
vs3 => "verb suffix type 3",
vs4 => "verb suffix type 4",
vs5 => "verb suffix type 5",
vs6 => "verb suffix type 6",
vs7 => "verb suffix type 7",
vs8 => "verb suffix type 8",
vs9 => "verb suffix type 9",
number => "numeral",
rover => "verb suffix type rover",
);
# turn subqueries into regexes
my $w = qr/[\w']/; # word character class
return map {
# split subquery into field name & search phrase
my ($field, $phrase) = /^ (?:([^":]*):)? "?(.*?)"? $/x;
# quote metacharacters + "any field" if field was empty
$field = defined($field) ? quotemeta(lc($field)) : "[^:]*";
$phrase = quotemeta($phrase);
# all fields are case insensetive, except "tlh"
my $lcphrase = lc $phrase;
if ($field eq "pos" and exists($pos{$lcphrase})) {
qr/^($field:)\t($pos{$lcphrase})$/m;
} elsif ($field eq "id") {
my %chr = (0 => 'o', O => 'o', I => '1', l => '1');
for ($phrase) { s/([0OIl])/$chr{$1}/g; s/\\[*]/.*/g; }
qr/^($field:)\t($phrase)$/m; # case sensetive
} else {
$field = "tag" if $field eq "cat";
for ($phrase) { s/\\\*/$w*/g; s/\s+/\\s+/g; } # replace '*' and ' '
if ($field eq "tlh") {
qr/^($field:.*)(?<!$w)($phrase)(?!$w)/m; # case sensetive
} else {
qr/^($field:.*)(?<!$w)($phrase)(?!$w)/im; # case insensetive
}
}
} @subquery;
}
sub html_no_match {
my ($query) = @_;
return <<"EOF";
<p>Your search – <b>$query</b> – did not match any
dictionary entries.</p>
<p>Suggestions:</p>
<p>
<ul>
<li>Make sure all words are spelled correctly.
<li>Try different keywords.
<li>Try more general keywords.
</ul>
</p>
EOF
}
sub html_empty_page {
return <<"EOF";
<p>The book has a Klingon–English, and an English–Klingon side.
The wordlists are automatically generated from <a href="dict.zdb">a flat text
database</a>, which is human readable and easy to update. The database has
been continuously updated and improved since it was created in late
<time>1997</time>.</p>
<table class="noborder layout">
<tr>
<th colspan=2>Search Expressions
<tr>
<td class=center><b>"</b>…<b>"</b>
<td>search for a phrase (containing more than one word)
<tr>
<td class=center><b>*</b>
<td>matches any alphabetical character
<tr>
<td class=center><b>tlh:</b>…
<td>search Klingon words <i>(case sensetive)</i>
<tr>
<td class=center><b>en:</b>…
<td>search English translations
<tr>
<td class=center><b>sv:</b>…
<td>search Swedish translations
<tr>
<td class=center><b>pos:</b>…
<td>search part-of-speech field<br>(use abbrev
from <i><a href="intro.html">Introduction</a></i>, <i>ns#,</i> <i>vs#</i>
or free text)
<tr>
<td class=center><b>def:</b>…
<td>search defining sources
<tr>
<td class=center><b>ref:</b>…
<td>search referring sources
</table>
<p>Case <em>only</em> matters when you’re using the search prefix <b>tlh:</b>,
otherwise all searches are case insensetive.</p>
EOF
}
sub html_head {
return <<"EOF";
<!DOCTYPE html>
<!-- Copyright 1998–2022 by zrajm. License: CC BY-SA (text), GPLv2 (code) -->
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>Klingon Pocket Dictionary – Klingonska Akademien</title>
<link rel=stylesheet href="../includes/base.css">
<link rel=stylesheet href="../includes/dict.css">
<link rel=stylesheet href="../includes/pagetabs.css">
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
<link rel="manifest" href="/site.webmanifest">
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#bb3333">
<meta name="msapplication-TileColor" content="#bb3333">
<meta name="theme-color" content="#bb3333">
<link rel=canonical href="http://klingonska.org/dict/">
<style>
td, th { vertical-align: text-top; }
th {
font-weight: normal;
white-space: nowrap;
text-align: left;
}
mark { background-color: #bbb; font-weight: inherit; font-style: inherit; }
</style>
<header role=banner class=tabbed>
<!-- begin:status -->
<ul>
<li>
<nav itemprop=breadcrumb role=navigation>
<a href="http://klingonska.org/">Home</a> >
<a href="http://klingonska.org/dict/">Klingon Pocket Dictionary</a>
</nav>
<li>
Updated <time pubdate datetime="2022-07-10T12:49+02:00">July 10, 2022</time>
</ul>
<!-- end:status -->
<div>
<a href="../">
<table id=logotitle>
<td>
<span class=crop>
<img height=200 width=200 src="../pic/ka-logo.svg" alt="Klingonska Akademien">
</span>
<td>
<h1>Klingonska<span id=logospace> </span>Akademien</h1>
</table>
</a>
<nav class=pagetabs role=navigation>
<a href="about.html">About</a>
<a href="intro.html">Introduction</a>
<span class=selected>Lexicon</span>
<a href="suffix.html">Suffix Guide</a>
<a href="tables.html">Reference Tables</a>
</nav>
</div>
</header>
<article role=main itemprop=mainContentOfPage style="padding-top:1px">
<h1>Klingon Pocket Dictionary</h1>
<aside class=note>Some info + searchable version of the pocket dictionary database.</aside>
<section>
EOF
}
sub html_form {
my ($query) = (@_, "");
$query = escapeHTML($query);
(my $script_name = $0) =~ s#^.*/##;
my $tips = $tips[int(rand(@tips))];
return <<"EOF";
<table class="noborder layout">
<tr>
<td class=center>
<form class=dict method=get action=""
><input name=q autocomplete=off placeholder="Search dictionary…"
value="$query" autofocus
><button type=submit title="Search"
><img alt="Magnifying glass" src="../pic/magnify.svg"
></button
></form>
<tr>
<td class=center><small>$tips</small>
</table>
EOF
}
sub html_foot {
return <<'EOF';
</section>
</article>
<footer role=contentinfo>
<p class=copyright>© <time itemprop=copyrightYear>1998</time>–<time>2022</time> by
<a href="mailto:zrajm@klingonska.org" rel=author itemprop=author>zrajm</a>,
<a href="http://klingonska.org/" itemprop=sourceOrganization>Klingonska Akademien</a>, Uppsala
<p>License: <a href="http://creativecommons.org/licenses/by-sa/3.0/" rel=license>CC BY-SA</a>
</footer>
<script src="../includes/titlewrap.js"></script>
<!--[eof]-->
EOF
}
###############################################################################
## ##
## Main Program ##
## ##
###############################################################################
# no query: display empty search form & exit
if (not param("q")) {
print
html_head() .
html_form() .
html_empty_page() .
html_foot();
exit 0;
}
# read query from HTML form value
my $query = param("q"); # read form "q" value
$query = "" unless defined($query); # make sure it's never undef
$query = decode("UTF-8", $query); # UTF-8 decode it
print html_head() . html_form($query);
{
# make list of page content
my @regex = split_query($query);
my @output = ();
my $matches = 0;
WORD: foreach (read_dictionary("dict.zdb")) {
# highlight matching words
foreach my $regex (@regex) {
s#$regex#$1<mark>$2</mark>#m or next WORD;
}
push @output, ' <tr><td colspan=2> ' . "\n"
if $matches > 0;
$matches ++;
# presentation
s{ \{ (.*?) \} }{ # boldify
my $tlh = $1;
$tlh =~ s/'/’/g; # typographical quotes
'<b lang="tlh">' . $tlh . '</b>';
}gex;
s#~(.*?)~#<i>$1</i>#g; # apply italics
s#(.*)¿\?(.*)#$1$2 (uncertain translation)#g;
s/^\n//;
foreach (split(/\n/, $_)) {
my ($field, @content) = split(/:/, $_, 2);
s/^\s+//, s/\s+$// foreach @content;
if (exists($postprocess{$field})) {
@content = &{$postprocess{$field}}(@content);
next unless @content;
}
push @output,
" <tr title=\"" .
(exists $field{$field} ? $field{$field} : ucfirst $field) .
" (search prefix “$field:”)\">\n",
" <th class=right><span class=light>$field:</span>\n",
" <td>@content";
}
}
# output page
if ($matches == 0) {
print html_no_match($query);
} else {
print '<table class="noborder layout">' . "\n";
print ' <tr><td colspan=2>' . $matches . ' match' . ($matches == 1 ? "" : "es") . ".\n";
print " (Want the whole dictionary? Download it <i><a href=\"dict.zdb\">here</a></i>.)\n" if $matches > 100;
print ' <tr><td colspan=2> ' . "\n";
print @output;
print "</table>\n\n";
}
}
print html_foot();
#[eof]