Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 327 lines (275 sloc) 12.4 KB
#!/bin/sh
#| This is actually -*- lisp -*- code
# sbcl needs a little help to pass the script name to sb-ext:*posix-argv* code:
exec sbcl --script "$0" "$0" "$@"
|#
;;; Generate an XML index file for all packages
;;;
;;; Copyright (c) 2010 Openmoko Inc.
;;;
;;; Authors Christopher Hall <hsw@openmoko.com>
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
(load (merge-pathnames "quicklisp/setup.lisp"
(user-homedir-pathname)))
(ql:quickload 'getopt)
(ql:quickload 'md5)
(ql:quickload 'cl-json)
(defvar *program* (second sb-ext:*posix-argv*) "name of this script file")
(defvar *debug* nil "set true for debug output")
(defvar *verbose* nil "set true for verbose output")
(defun usage (&rest message)
"Display a usage message"
(if message
(format t "error: ~a~%" message))
(format t "usage: ~a <options> <directories>~%" *program*)
(format t " --verbose more verbose output~%")
(format t " --debug debugging output~%")
(format t " --languages=<data-file> language data file (lisp)~%")
(format t " --index=<output-file> the output index file~%")
(sb-ext:quit :unix-status 1))
(defvar *type-codes* (make-hash-table :test #'equal))
(loop for item in
'(("appro" "Appropedia")
("books" "Wikibooks")
("dict" "Wiktionary")
("guten" "Gutenberg")
("how" "Wikihow")
("kjv" "KingJamesBible")
("pedia" "Wikipedia")
("quote" "Wikiquote")
("starw" "Wookieepedia")
("fgrlm" "ForgottenRealms")
("dand" "DandD")
("trav" "Wikitravel"))
do
(setf (gethash (first item) *type-codes*) (second item)))
(defun get-type (code)
"return type name from type code"
(multiple-value-bind (value exists)
(gethash code *type-codes*)
(and exists value)))
(defvar *protocol* "http://" "HTTP or HTTPS")
(defvar *domain* ".thewikireader.com/downloads/" "fixed part of the URL")
(defun get-url (prefix file)
"create the full URL"
(concatenate 'string *protocol* prefix *domain* file))
(defvar *languages* (make-hash-table :test #'equal))
(defun load-languages (filename)
"load the *language* hash table from list of languages"
(loop for item in
(with-open-file (stream filename :direction :input)
(read stream))
do
(setf (gethash (first item) *languages*) (rest item))))
(defun get-language (code)
"return (english native) from language code"
(multiple-value-bind (value exists)
(gethash code *languages*)
(and exists value)))
(defun match-prefix-suffix (prefix suffix possible)
"extract the language and type identifier from a string"
(let ((lang (get-language prefix))
(type (get-type suffix)))
(cond
((and lang type)
(list (cons prefix lang) (list suffix type)))
((string= "" prefix)
possible)
(t
(let* ((end-pos (1- (length prefix)))
(new-prefix (subseq prefix 0 end-pos))
(new-suffix (concatenate 'string (string (elt prefix end-pos)) suffix))
(new-possible (cond
(lang
(list (append (list prefix) lang) (make-list 2 :initial-element suffix)))
(type
(list (make-list 3 :initial-element prefix) (list suffix type)))
(t possible))))
(match-prefix-suffix new-prefix new-suffix new-possible))))))
(defun split-prefix (item)
"extract the language and type identifier from a string, e.g.
(dedict 12345678 (de German Deutsch) (dict Wiktionary))"
(match-prefix-suffix item "" (list '("!" "None" "None") '("Invalid" "Invalid"))))
(defun identify (file-name)
"separate out language, type and date from file name: <lang><type>-<date>"
(let* ((p (search "-" file-name))
(prefix (subseq file-name 0 p))
(date (subseq file-name (1+ p))))
(if (string= "base" prefix)
(list "." date '("#" "all" "all") '("base" "base"))
(append (list prefix date) (split-prefix prefix)))))
(defun uncompressed-size (file-name)
"call 7z l file and extract the uncompressed size from the output"
(let* ((blanks '(#\Space #\NO-BREAK_SPACE #\Tab #\Vt #\Return #\Newline))
(rc 0)
(out (string-trim blanks
(with-output-to-string (st)
(setq rc (sb-ext:process-exit-code
(sb-ext:run-program "/usr/bin/7z"
(list "l" file-name)
:output st :error st))))))
(p (search (string #\Newline) out :from-end t))
(last-line (string-trim blanks (subseq out (1+ p)))))
(if (zerop rc)
(parse-integer last-line :start 0 :end (search " " last-line))
0)))
(defun vector-to-hex (byte-vector)
"convert vector of integers to hexadecimal string, values clipped to 00..ff"
(format nil "~(~{~2,'0X~}~)"
(map 'list #'(lambda (x) (logand #xff x)) byte-vector)))
(defun md5sum-and-length (file-path)
"return a list (length md5-sum) for the file, save the sum in <file-path>.MD5SUM"
(let ((sum-path (concatenate 'string (namestring file-path) ".MD5SUM")))
(when *debug*
(format t " File: ~a SUM: ~d~%" file-path sum-path))
(with-open-file (file-stream file-path :direction :input :element-type 'unsigned-byte)
(let ((size (file-length file-stream)))
(if (and (probe-file sum-path) (> (file-write-date sum-path) (file-write-date file-path)))
(progn
(when *debug*
(format t " Read cached sum from: ~a~%" sum-path))
(with-open-file (sum-stream sum-path :direction :input :element-type 'base-char)
(list size (read-line sum-stream))))
(let ((sum (vector-to-hex (md5:md5sum-stream file-stream))))
(with-open-file (out-stream sum-path :direction :output :if-exists :supersede
:if-does-not-exist :create)
(princ sum out-stream))
(list size sum)))))))
(defun split-path-string (string)
"Returns a list of substrings of string e.g. /a/b/c -> (\"a\" \"b\" \"c\")
a trailing '/' will cause an empty item on the end of the list"
(loop for i = 0 then (1+ j)
as j = (position #\/ string :start i)
collect (subseq string i j)
while j))
(defun list-of-files (dir-name)
"return a sorted list of all .7z and .7z.* files ((basename size MD5 prefix)...)"
(let ((7z (concatenate 'string dir-name "/*.7z"))
(arc (concatenate 'string dir-name "/*.7z.???")))
(loop for file-path in (sort
(append (directory 7z) (directory arc))
#'string< :key #'namestring)
when
(let ((basename (file-namestring file-path))
(prefix (first (last (split-path-string (directory-namestring file-path)) 2))) ; always has trailing '/' => 2 conses
(ignore-file-name (concatenate 'string (namestring file-path) ".IGNORE")))
(if (probe-file ignore-file-name)
(when *verbose*
(format t " Skip: ~a~%" basename)
nil)
(progn
(when *verbose*
(format t " File: ~a~%" basename))
(append (list basename) (md5sum-and-length file-path) (list prefix)))))
collect it)))
(defun partition-files (dir file-list)
"put adjacent similar names into a nested list
(folder date language type ( uncompressed-size (file1 length1 MD5-1 prefix1) (file2 length2 MD5-2 prefix2) ... ) )"
(let ((table (make-hash-table :test #'equal)))
(loop for item in file-list
do
(let* ((file-name (first item))
(key (subseq file-name 0 (search ".7z" file-name))))
(setf (gethash key table)
(if (gethash key table nil)
(append (gethash key table) (list item))
(list
(uncompressed-size (concatenate 'string dir "/" file-name))
item)))))
(loop for k being the hash-keys in table using (hash-value v)
collect (append (identify k) (list v)))))
(defun output-volumes (structure &key base)
"output the volume list"
(loop
for item in structure
when (eq base (string= (first item) "."))
collect
(let* ((lang (third item))
(id (first lang))
(english (second lang))
(native (third lang))
(name (second (fourth item)))
(date (second item))
(folder (first item))
(a (fifth item))
(uncompressed-size (first a))
(archives (rest a)))
(list
(cons 'id id)
(cons 'english english)
(cons 'native native)
(cons 'name name)
(cons 'date date)
(cons 'folder folder)
(cons 'size uncompressed-size)
(cons 'files (map 'list #'make-file-item archives))))))
(defun make-file-item (file)
"create a file object"
(let* ((name (first file))
(size (second file))
(md5 (third file))
(prefix (fourth file)))
(list
(cons 'name name)
(cons 'url (get-url prefix name))
(cons 'size size)
(cons 'md5 md5))))
(defun output-structure (stream structure)
"output the JSON structure"
(let ((bases (output-volumes structure :base t))
(volumes (output-volumes structure))
(table (make-hash-table :test #'equal)))
(setf (gethash 'bases table) bases)
(setf (gethash 'volumes table) volumes)
(json:encode-json table stream)))
(defun main (args)
"Main program"
(setq *program* (first args))
(multiple-value-bind (args opts errors) (getopt:getopt (rest args)
'(("index" :required)
("languages" :required)
("help" :none t)
("verbose" :none t)
("debug" :none t)))
(when errors
(usage "invalid option: " (first errors)))
(setq *verbose* (rest (assoc "verbose" opts :test 'string=)))
(setq *debug* (rest (assoc "debug" opts :test 'string=)))
(when (rest (assoc "help" opts :test 'string=))
(usage))
(let ((index (rest (assoc "index" opts :test 'string=)))
(languages (rest (assoc "languages" opts :test 'string=)))
(directories args))
(when *debug*
(format t "parsed opts = ~a~%" opts)
(format t "non-option args = ~a~%" args)
(format t "output file = ~a~%" index))
(unless index (usage "Missing index"))
(unless languages (usage "Missing languages file"))
(unless (probe-file languages) (usage "Languages file does not exist"))
(when (= 0 (length directories)) (usage "missing arguments"))
(load-languages languages)
(with-open-file (out-stream index :direction :output :if-exists :supersede
:if-does-not-exist :create)
(let ((all-files (loop for dir in (sort directories #'string<)
append
(when (string= "" (file-namestring (probe-file dir)))
(when *verbose*
(format t "Directory: ~a~%" dir))
(partition-files dir (list-of-files dir))))))
(output-structure out-stream all-files))))))
;;; run the main program
;;; from exec at top of file sb-ext:*posix-argv* = ("sbcl" "script-file" "arg1"...)
(main (rest sb-ext:*posix-argv*))