Skip to content


Subversion checkout URL

You can clone with
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 306 lines (257 sloc) 11.4 KB
#| This is actually -*- lisp -*- code
# sbcl needs a little help to pass the script name to sb-ext:*posix-argv* code:
exec sbcl --script "$0" "$0" "$@"
;;; Generate an XML index file for all packages
;;; Copyright (c) 2010 Openmoko Inc.
;;; Authors Christopher Hall <>
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <>.
(load (merge-pathnames "quicklisp/setup.lisp"
(ql:quickload 'getopt)
(ql:quickload 'md5)
(ql:quickload 'cl-json)
(defvar *program* (second sb-ext:*posix-argv*) "name of this script file")
(defvar *debug* nil "set true for debug output")
(defvar *verbose* nil "set true for verbose output")
(defun usage (&rest message)
"Display a usage message"
(if message
(format t "error: ~a~%" message))
(format t "usage: ~a <options> <directories>~%" *program*)
(format t " --verbose more verbose output~%")
(format t " --debug debugging output~%")
(format t " --languages=<data-file> language data file (lisp)~%")
(format t " --index=<output-file> the output index file~%")
(sb-ext:quit :unix-status 1))
(defvar *type-codes* (make-hash-table :test #'equal))
(loop for item in
'(("appro" "Appropedia")
("books" "Wikibooks")
("dict" "Wiktionary")
("guten" "Gutenberg")
("how" "Wikihow")
("pedia" "Wikipedia")
("quote" "Wikiquote")
("starw" "Wookieepedia")
("fgrlm" "ForgottenRealms")
("dand" "DandD")
("trav" "Wikitravel"))
(setf (gethash (first item) *type-codes*) (second item)))
(defun get-type (code)
"return type name from type code"
(multiple-value-bind (value exists)
(gethash code *type-codes*)
(and exists value)))
(defvar *languages* (make-hash-table :test #'equal))
(defun load-languages (filename)
"load the *language* hash table from list of languages"
(loop for item in
(with-open-file (stream filename :direction :input)
(read stream))
(setf (gethash (first item) *languages*) (rest item))))
(defun get-language (code)
"return (english native) from language code"
(multiple-value-bind (value exists)
(gethash code *languages*)
(and exists value)))
(defun match-prefix-suffix (prefix suffix possible)
"extract the language and type identifier from a string"
(let ((lang (get-language prefix))
(type (get-type suffix)))
((and lang type)
(list (cons prefix lang) (list suffix type)))
((string= "" prefix)
(let* ((end-pos (1- (length prefix)))
(new-prefix (subseq prefix 0 end-pos))
(new-suffix (concatenate 'string (string (elt prefix end-pos)) suffix))
(new-possible (cond
(list (append (list prefix) lang) (make-list 2 :initial-element suffix)))
(list (make-list 3 :initial-element prefix) (list suffix type)))
(t possible))))
(match-prefix-suffix new-prefix new-suffix new-possible))))))
(defun split-prefix (item)
"extract the language and type identifier from a string, e.g.
(dedict 12345678 (de German Deutsch) (dict Wiktionary))"
(match-prefix-suffix item "" (list '("!" "None" "None") '("Invalid" "Invalid"))))
(defun identify (file-name)
"separate out language, type and date from file name: <lang><type>-<date>"
(let* ((p (search "-" file-name))
(prefix (subseq file-name 0 p))
(date (subseq file-name (1+ p))))
(if (string= "base" prefix)
(list "." date '("#" "all" "all") '("base" "base"))
(append (list prefix date) (split-prefix prefix)))))
(defun uncompressed-size (file-name)
"call 7z l file and extract the uncompressed size from the output"
(let* ((blanks '(#\Space #\NO-BREAK_SPACE #\Tab #\Vt #\Return #\Newline))
(rc 0)
(out (string-trim blanks
(with-output-to-string (st)
(setq rc (sb-ext:process-exit-code
(sb-ext:run-program "/usr/bin/7z"
(list "l" file-name)
:output st :error st))))))
(p (search (string #\Newline) out :from-end t))
(last-line (string-trim blanks (subseq out (1+ p)))))
(if (zerop rc)
(parse-integer last-line :start 0 :end (search " " last-line))
(defun vector-to-hex (byte-vector)
"convert vector of integers to hexadecimal string, values clipped to 00..ff"
(format nil "~(~{~2,'0X~}~)"
(map 'list #'(lambda (x) (logand #xff x)) byte-vector)))
(defun md5sum-and-length (file-path)
"return a list (length md5-sum) for the file, save the sum in <file-path>.MD5SUM"
(let ((sum-path (concatenate 'string (namestring file-path) ".MD5SUM")))
(when *debug*
(format t " File: ~a SUM: ~d~%" file-path sum-path))
(with-open-file (file-stream file-path :direction :input :element-type 'unsigned-byte)
(let ((size (file-length file-stream)))
(if (and (probe-file sum-path) (> (file-write-date sum-path) (file-write-date file-path)))
(when *debug*
(format t " Read cached sum from: ~a~%" sum-path))
(with-open-file (sum-stream sum-path :direction :input :element-type 'base-char)
(list size (read-line sum-stream))))
(let ((sum (vector-to-hex (md5:md5sum-stream file-stream))))
(with-open-file (out-stream sum-path :direction :output :if-exists :supersede
:if-does-not-exist :create)
(princ sum out-stream))
(list size sum)))))))
(defun list-of-files (dir-name)
"return a sorted list of all .7z and .7z.* files ((basename size MD5)...)"
(let ((7z (concatenate 'string dir-name "/*.7z"))
(arc (concatenate 'string dir-name "/*.7z.???")))
(loop for file-path in (sort
(append (directory 7z) (directory arc))
#'string< :key #'namestring)
(let ((basename (file-namestring file-path))
(ignore-file-name (concatenate 'string (namestring file-path) ".IGNORE")))
(if (probe-file ignore-file-name)
(when *verbose*
(format t " Skip: ~a~%" basename)
(when *verbose*
(format t " File: ~a~%" basename))
(append (list basename) (md5sum-and-length file-path)))))
collect it)))
(defun partition-files (dir file-list)
"put adjacent similar names into a nested list
(folder date language type ( uncompressed-size (file1 length1 MD5-1) (file2 length2 MD5-2) ... ) )"
(let ((table (make-hash-table :test #'equal)))
(loop for item in file-list
(let* ((file-name (first item))
(key (subseq file-name 0 (search ".7z" file-name))))
(setf (gethash key table)
(if (gethash key table nil)
(append (gethash key table) (list item))
(uncompressed-size (concatenate 'string dir "/" file-name))
(loop for k being the hash-keys in table using (hash-value v)
collect (append (identify k) (list v)))))
(defun output-volumes (structure &key base)
"output the volume list"
for item in structure
when (eq base (string= (first item) "."))
(let* ((lang (third item))
(id (first lang))
(english (second lang))
(native (third lang))
(name (second (fourth item)))
(date (second item))
(folder (first item))
(a (fifth item))
(uncompressed-size (first a))
(archives (rest a)))
(cons 'id id)
(cons 'english english)
(cons 'native native)
(cons 'name name)
(cons 'date date)
(cons 'folder folder)
(cons 'size uncompressed-size)
(cons 'files (map 'list #'make-file-item archives))))))
(defun make-file-item (file)
"create a file object"
(let* ((name (first file))
(size (second file))
(md5 (third file)))
(cons 'name name)
(cons 'size size)
(cons 'md5 md5))))
(defun output-structure (stream dir-name structure)
"output the JSON structure"
(let ((bases (output-volumes structure :base t))
(volumes (output-volumes structure))
(table (make-hash-table :test #'equal)))
(setf (gethash 'bases table) bases)
(setf (gethash 'volumes table) volumes)
(json:encode-json table stream)))
(defun main (args)
"Main program"
(setq *program* (first args))
(multiple-value-bind (args opts errors) (getopt:getopt (rest args)
'(("index" :required)
("languages" :required)
("help" :none t)
("verbose" :none t)
("debug" :none t)))
(when errors
(usage "invalid option: " (first errors)))
(setq *verbose* (rest (assoc "verbose" opts :test 'string=)))
(setq *debug* (rest (assoc "debug" opts :test 'string=)))
(when (rest (assoc "help" opts :test 'string=))
(let ((index (rest (assoc "index" opts :test 'string=)))
(languages (rest (assoc "languages" opts :test 'string=)))
(directories args))
(when *debug*
(format t "parsed opts = ~a~%" opts)
(format t "non-option args = ~a~%" args)
(format t "output file = ~a~%" index))
(unless index (usage "Missing index"))
(unless languages (usage "Missing languages file"))
(unless (probe-file languages) (usage "Languages file does not exist"))
(when (= 0 (length directories)) (usage "missing arguments"))
(load-languages languages)
(with-open-file (out-stream index :direction :output :if-exists :supersede
:if-does-not-exist :create)
(loop for dir in (sort directories #'string<)
(when *verbose*
(format t "Directory: ~a~%" dir))
(output-structure out-stream dir (partition-files dir (list-of-files dir))))))))
;;; run the main program
;;; from exec at top of file sb-ext:*posix-argv* = ("sbcl" "script-file" "arg1"...)
(main (rest sb-ext:*posix-argv*))
Jump to Line
Something went wrong with that request. Please try again.