Permalink
Browse files

[scripts] script updates for new release

Signed-off-by: Christopher Hall <hsw@openmoko.com>
  • Loading branch information...
1 parent 226f606 commit 397d26dae347c92acf598e71f518bcead371b3ec @hxw hxw committed Apr 17, 2013
Showing with 278 additions and 112 deletions.
  1. +1 −0 XML-Licenses/en/wiki.nls
  2. +1 −0 XML-Licenses/wiki.inf
  3. +109 −0 scripts/BibleLinks
  4. +32 −11 scripts/GenerateIndex
  5. +6 −2 scripts/Run
  6. +125 −96 scripts/pack
  7. +4 −3 scripts/wiki-names.sh
View
@@ -22,6 +22,7 @@ quote:wiki_name=Quotes
starw:wiki_name=Wookieepedia
trav:wiki_name=WikiTravel
hitch:wiki_name=Hitchwiki
+kjv:wiki_name=King James Bible
license_text=Text is available under the Creative Commons Attribution/Share-Alike License and can be freely reused under the terms of that license. See: Text of the [CC-BY-SA] License and [Terms of Use] for additional terms which may apply. The original article is available at: http://en.wikipedia.org/wiki/<title>
View
@@ -43,6 +43,7 @@
GUTENBERG, en, enguten,
OTHERS, en, enhow,
OTHERS, en, enhitch,
+ OTHERS, en, enkjv,
, en, enpedia,
QUOTE, en, enquote,
OTHERS, en, enstarw,
View
@@ -0,0 +1,109 @@
+#!/bin/sh
+# aside from this initial boilerplate, this is actually -*- scheme -*- code
+main='(module-ref (resolve-module '\''(scripts script-entry-point)) '\'main')'
+exec ${GUILE-guile} --no-auto-compile -l $0 -c "(apply $main (command-line))" "$0" "$@"
+!#
+
+;;; nls-print - print the name of a language from its code
+;;;
+;;; Copyright (c) 2010 Openmoko Inc.
+;;;
+;;; Authors Christopher Hall <hsw@openmoko.com>
+;;;
+;;; This program is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+(define-module (scripts script-entry-point)
+ :use-module (ice-9 getopt-long)
+ :use-module (ice-9 rdelim)
+ :use-module (ice-9 regex)
+ :use-module (srfi srfi-1)
+ :export (script-entry-point))
+
+(define-macro (unless cond . body)
+ `(if (not ,cond) (begin ,@body)))
+
+(define *program* "program-name-here")
+(define *debug* #f)
+(define *verbose* #f)
+
+(define (usage message)
+ "Display a usage message"
+ (if (string? message)
+ (format #t "error: ~a~%" message))
+ (format #t "usage: ~a [--verbose] [--debug] <file>~%" *program*)
+ (exit 1))
+
+
+(define (main . args)
+ "Main program"
+ (set! *program* (second args))
+ (let* ((parsed-opts (getopt-long (drop args 1)
+ '((help (single-char #\h))
+ (verbose (single-char #\v))
+ (debug (single-char #\d)))))
+ (=h (option-ref parsed-opts 'help #f))
+ (*verbose* (option-ref parsed-opts 'verbose #f))
+ (*debug* (option-ref parsed-opts 'debug #f))
+ (inputs (drop (first parsed-opts) 1)))
+ (if =h (usage '()))
+ (if *debug*
+ (begin
+ (format #t "parsed opts = ~a~%" parsed-opts)
+ (format #t "non-option args = ~a~%" inputs)))
+
+ (unless (= 1 (length inputs)) (usage "exactly one code argument is required"))
+
+ (let ((title (make-regexp "<title>(Bible[^<]*/([^<]*))</title>"))
+ (numeric-prefix (make-regexp "^([[:digit:]]+|I+)[[:space:]]+(.*)$"))
+ (in (car inputs)))
+ (with-input-from-file in
+ (lambda ()
+ (while (not (eof-object? (peek-char)))
+ (let* ((line (string-trim-both (read-line)))
+ (match (regexp-exec title line)))
+ (if match
+ (let* ((article (get-sub-match match 1))
+ (redirect (get-sub-match match 2))
+ (prefixed (regexp-exec numeric-prefix redirect)))
+ (if *debug*
+ (format #t "redirect = ~a -> ~a~%" redirect article))
+ (create-redirect article redirect)
+ (if prefixed
+ (let ((number (get-sub-match prefixed 1))
+ (name (get-sub-match prefixed 2)))
+ (create-redirect article (string-join (list name number) " "))))
+
+ )))))))
+))
+
+
+(define (get-sub-match match i)
+ "extract the full title from a match"
+ (let ((text (array-ref match 0))
+ (ref (array-ref match (1+ i))))
+ (string-copy text (car ref) (cdr ref))))
+
+
+(define (create-redirect existing-title new-title)
+ "Create a MediaWiki redirect entry"
+ (format #t "
+ <page>
+ <title>~a</title>
+ <revision>
+ <text xml:space=\"preserve\">#REDIRECT [[~a]]
+</text>
+ </revision>
+ </page>
+" new-title existing-title))
View
@@ -57,6 +57,7 @@ exec sbcl --script "$0" "$0" "$@"
("dict" "Wiktionary")
("guten" "Gutenberg")
("how" "Wikihow")
+ ("kjv" "KingJamesBible")
("pedia" "Wikipedia")
("quote" "Wikiquote")
("starw" "Wookieepedia")
@@ -66,14 +67,21 @@ exec sbcl --script "$0" "$0" "$@"
do
(setf (gethash (first item) *type-codes*) (second item)))
-
(defun get-type (code)
"return type name from type code"
(multiple-value-bind (value exists)
(gethash code *type-codes*)
(and exists value)))
+(defvar *protocol* "http://" "HTTP or HTTPS")
+(defvar *domain* ".thewikireader.com/downloads/" "fixed part of the URL")
+
+(defun get-url (prefix file)
+ "create the full URL"
+ (concatenate 'string *protocol* prefix *domain* file))
+
+
(defvar *languages* (make-hash-table :test #'equal))
(defun load-languages (filename)
@@ -174,15 +182,24 @@ exec sbcl --script "$0" "$0" "$@"
(list size sum)))))))
+(defun split-path-string (string)
+ "Returns a list of substrings of string e.g. /a/b/c -> (\"a\" \"b\" \"c\")
+a trailing '/' will cause an empty item on the end of the list"
+ (loop for i = 0 then (1+ j)
+ as j = (position #\/ string :start i)
+ collect (subseq string i j)
+ while j))
+
(defun list-of-files (dir-name)
- "return a sorted list of all .7z and .7z.* files ((basename size MD5)...)"
+ "return a sorted list of all .7z and .7z.* files ((basename size MD5 prefix)...)"
(let ((7z (concatenate 'string dir-name "/*.7z"))
(arc (concatenate 'string dir-name "/*.7z.???")))
(loop for file-path in (sort
(append (directory 7z) (directory arc))
#'string< :key #'namestring)
when
(let ((basename (file-namestring file-path))
+ (prefix (first (last (split-path-string (directory-namestring file-path)) 2))) ; always has trailing '/' => 2 conses
(ignore-file-name (concatenate 'string (namestring file-path) ".IGNORE")))
(if (probe-file ignore-file-name)
(when *verbose*
@@ -191,13 +208,13 @@ exec sbcl --script "$0" "$0" "$@"
(progn
(when *verbose*
(format t " File: ~a~%" basename))
- (append (list basename) (md5sum-and-length file-path)))))
+ (append (list basename) (md5sum-and-length file-path) (list prefix)))))
collect it)))
(defun partition-files (dir file-list)
"put adjacent similar names into a nested list
- (folder date language type ( uncompressed-size (file1 length1 MD5-1) (file2 length2 MD5-2) ... ) )"
+ (folder date language type ( uncompressed-size (file1 length1 MD5-1 prefix1) (file2 length2 MD5-2 prefix2) ... ) )"
(let ((table (make-hash-table :test #'equal)))
(loop for item in file-list
do
@@ -244,14 +261,16 @@ exec sbcl --script "$0" "$0" "$@"
"create a file object"
(let* ((name (first file))
(size (second file))
- (md5 (third file)))
+ (md5 (third file))
+ (prefix (fourth file)))
(list
(cons 'name name)
+ (cons 'url (get-url prefix name))
(cons 'size size)
(cons 'md5 md5))))
-(defun output-structure (stream dir-name structure)
+(defun output-structure (stream structure)
"output the JSON structure"
(let ((bases (output-volumes structure :base t))
(volumes (output-volumes structure))
@@ -293,11 +312,13 @@ exec sbcl --script "$0" "$0" "$@"
(load-languages languages)
(with-open-file (out-stream index :direction :output :if-exists :supersede
:if-does-not-exist :create)
- (loop for dir in (sort directories #'string<)
- do
- (when *verbose*
- (format t "Directory: ~a~%" dir))
- (output-structure out-stream dir (partition-files dir (list-of-files dir))))))))
+ (let ((all-files (loop for dir in (sort directories #'string<)
+ append
+ (when (string= "" (file-namestring (probe-file dir)))
+ (when *verbose*
+ (format t "Directory: ~a~%" dir))
+ (partition-files dir (list-of-files dir))))))
+ (output-structure out-stream all-files))))))
;;; run the main program
View
@@ -284,8 +284,12 @@ do
arg="${arg#*:}"
truncate="${arg%%:*}"
- IsInteger "${truncate}" || truncate='unlimited'
- [ "${truncate}" -le 0 ] && truncate='unlimited'
+ if IsInteger "${truncate}"
+ then
+ [ "${truncate}" -le 0 ] && truncate='unlimited'
+ else
+ truncate='unlimited'
+ fi
# license and terms
licenses=$(readlink -m "${LicensesDirectory}")
Oops, something went wrong.

0 comments on commit 397d26d

Please sign in to comment.