Skip to content


Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 353 lines (294 sloc) 13.2 kb
#| This is actually -*- lisp -*- code
# sbcl needs a little help to pass the script name to sb-ext:*posix-argv* code:
exec sbcl --script "$0" "$0" "$@"
;;; Generate an XML index file for all packages
;;; Copyright (c) 2010 Openmoko Inc.
;;; Authors Christopher Hall <>
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <>.
(require 'asdf)
(require 'getopt)
(require 'md5)
(defvar *program* (second sb-ext:*posix-argv*) "name of this script file")
(defvar *debug* nil "set true for debug output")
(defvar *verbose* nil "set true for verbose output")
(defun usage (&rest message)
"Display a usage message"
(if message
(format t "error: ~a~%" message))
(format t "usage: ~a <options> <directories>~%" *program*)
(format t " --verbose more verbose output~%")
(format t " --debug debugging output~%")
(format t " --languages=<data-file> language data file (lisp)~%")
(format t " --index=<output-file> the output index file~%")
(sb-ext:quit :unix-status 1))
(defvar *type-codes* (make-hash-table :test #'equal))
(loop for item in
'(("appro" "Appropedia")
("books" "Wikibooks")
("dict" "Wiktionary")
("guten" "Gutenberg")
("how" "Wikihow")
("pedia" "Wikipedia")
("quote" "Wikiquote")
("starw" "Wookieepedia")
("fgrlm" "ForgottenRealms")
("dand" "DandD")
("trav" "Wikitravel"))
(setf (gethash (first item) *type-codes*) (second item)))
(defun get-type (code)
"return type name from type code"
(multiple-value-bind (value exists)
(gethash code *type-codes*)
(and exists value)))
(defvar *languages* (make-hash-table :test #'equal))
(defun load-languages (filename)
"load the *language* hash table from list of languages"
(loop for item in
(with-open-file (stream filename :direction :input)
(read stream))
(setf (gethash (first item) *languages*) (rest item))))
(defun get-language (code)
"return (english native) from language code"
(multiple-value-bind (value exists)
(gethash code *languages*)
(and exists value)))
(defun match-prefix-suffix (prefix suffix possible)
"extract the language and type identifier from a string"
(let ((lang (get-language prefix))
(type (get-type suffix)))
((and lang type)
(list (cons prefix lang) (list suffix type)))
((string= "" prefix)
(let* ((end-pos (1- (length prefix)))
(new-prefix (subseq prefix 0 end-pos))
(new-suffix (concatenate 'string (string (elt prefix end-pos)) suffix))
(new-possible (cond
(list (append (list prefix) lang) (make-list 2 :initial-element suffix)))
(list (make-list 3 :initial-element prefix) (list suffix type)))
(t possible))))
(match-prefix-suffix new-prefix new-suffix new-possible))))))
(defun split-prefix (item)
"extract the language and type identifier from a string, e.g.
(dedict 12345678 (de German Deutsch) (dict Wiktionary))"
(match-prefix-suffix item "" (list '("!" "None" "None") '("Invalid" "Invalid"))))
(defun identify (file-name)
"separate out language, type and date from file name: <lang><type>-<date>"
(let* ((p (search "-" file-name))
(prefix (subseq file-name 0 p))
(date (subseq file-name (1+ p))))
(if (string= "base" prefix)
(list "." date '("#" "all" "all") '("base" "base"))
(append (list prefix date) (split-prefix prefix)))))
(defun uncompressed-size (file-name)
"call 7z l file and extract the uncompressed size from the output"
(let* ((blanks '(#\Space #\NO-BREAK_SPACE #\Tab #\Vt #\Return #\Newline))
(rc 0)
(out (string-trim blanks
(with-output-to-string (st)
(setq rc (sb-ext:process-exit-code
(sb-ext:run-program "/usr/bin/7z"
(list "l" file-name)
:output st :error st))))))
(p (search (string #\Newline) out :from-end t))
(last-line (string-trim blanks (subseq out (1+ p)))))
(if (zerop rc)
(parse-integer last-line :start 0 :end (search " " last-line))
(defun vector-to-hex (byte-vector)
"convert vector of integers to hexadecimal string, values clipped to 00..ff"
(format nil "~(~{~2,'0X~}~)"
(map 'list #'(lambda (x) (logand #xff x)) byte-vector)))
(defun md5sum-and-length (file-path)
"return a list (length md5-sum) for the file, save the sum in <file-path>.MD5SUM"
(let ((sum-path (concatenate 'string (namestring file-path) ".MD5SUM")))
(when *debug*
(format t " File: ~a SUM: ~d~%" file-path sum-path))
(with-open-file (file-stream file-path :direction :input :element-type 'unsigned-byte)
(let ((size (file-length file-stream)))
(if (and (probe-file sum-path) (> (file-write-date sum-path) (file-write-date file-path)))
(when *debug*
(format t " Read cached sum from: ~a~%" sum-path))
(with-open-file (sum-stream sum-path :direction :input :element-type 'base-char)
(list size (read-line sum-stream))))
(let ((sum (vector-to-hex (md5:md5sum-stream file-stream))))
(with-open-file (out-stream sum-path :direction :output :if-exists :supersede
:if-does-not-exist :create)
(princ sum out-stream))
(list size sum)))))))
(defun list-of-files (dir-name)
"return a sorted list of all .7z and .7z.* files ((basename size MD5)...)"
(let ((7z (concatenate 'string dir-name "/*.7z"))
(arc (concatenate 'string dir-name "/*.7z.???")))
(loop for file-path in (sort
(append (directory 7z) (directory arc))
#'string< :key #'namestring)
(let ((basename (file-namestring file-path)))
(when *verbose*
(format t " File: ~a~%" basename))
(append (list basename) (md5sum-and-length file-path))))))
(defun partition-files (dir file-list)
"put adjacent similar names into a nested list
(folder date language type ( uncompressed-size (file1 length1 MD5-1) (file2 length2 MD5-2) ... ) )"
(let ((table (make-hash-table :test #'equal)))
(loop for item in file-list
(let* ((file-name (first item))
(key (subseq file-name 0 (search ".7z" file-name))))
(setf (gethash key table)
(if (gethash key table nil)
(append (gethash key table) (list item))
(uncompressed-size (concatenate 'string dir "/" file-name))
(loop for k being the hash-keys in table using (hash-value v)
collect (append (identify k) (list v)))))
(defvar *xml-indent* 0 "for indenting the XML output")
(defvar *xml-stack* nil "for xml-close to match the tags")
(defvar *xml-stream* t "current xml output stream")
(defun xml-set-stream (out-stream)
"set the xml output stream"
(if out-stream
(setq *xml-stream* out-stream)
(setq *xml-stream* t))) ;; sets output to stdout
(defun xml-current-indent ()
"a number of spaces corresponding to *xml-indent*"
(make-string *xml-indent* :initial-element #\Space))
(defun xml-string-of (item)
"return string of item, but downcase symbols"
(if (symbolp item)
(string-downcase item)
(format nil "~a" item)))
(defun xml-attribute-string (attributes)
"convert: ((key value)...) to string form: key=''value''..."
(loop for attr in attributes
append (list (format nil " ~a=\"~a\""
(xml-string-of (first attr))
(xml-string-of (second attr))))
into attr-list
(return (apply #'concatenate 'string attr-list))))
(defun xml-entry (tag data &rest attributes)
"display a single line XML entry"
(format *xml-stream* "~a<~a~a>~a</~a>~%" (xml-current-indent)
(xml-string-of tag)
(xml-attribute-string attributes)
(xml-string-of tag)))
(defun xml-open (tag &rest attributes)
"start the XML tag"
(format *xml-stream* "~a<~a~a>~%" (xml-current-indent) (xml-string-of tag) (xml-attribute-string attributes))
(setq *xml-indent* (+ *xml-indent* 2))
(push (xml-string-of tag) *xml-stack*))
(defun xml-close ()
"close the most recently opened tag"
(when *xml-stack*
(setq *xml-indent* (- *xml-indent* 2))
(format *xml-stream* "~a</~a>~%" (xml-current-indent) (pop *xml-stack*))))
(defun output-archive (file-list uncompressed-size)
"output xml for a single archive ((file size MD5)...)"
(xml-open 'archives (list 'count (length file-list)) (list 'size uncompressed-size))
for count = 1 then (1+ count)
for item in file-list
(xml-entry 'file (concatenate 'string (first item)"?torrent")
(list 'id count) (list 'size (second item)) (list 'md5 (third item))))
(defun output-volumes (structure &key base)
"output the volume list"
for item in structure
with count = 0
when (eq base (string= (first item) "."))
(incf count)
(xml-open 'volume (list 'id count))
(let* ((lang (third item))
(id (first lang))
(english (second lang))
(native (third lang)))
(xml-entry 'language native (list 'id id) (list 'name english)))
(xml-entry 'name (second (fourth item)))
(xml-entry 'date (second item))
(xml-entry 'folder (first item))
(let* ((a (fifth item))
(uncompressed-size (first a))
(archives (rest a)))
(output-archive archives uncompressed-size))
(defun output-structure (dir-name structure)
"output the XML structure"
(xml-open 'release)
(xml-entry 'version (file-namestring dir-name))
(xml-open 'compatibility '(select single))
(output-volumes structure :base t)
(xml-open 'documents '(select multiple))
(output-volumes structure)
(defun main (args)
"Main program"
(setq *program* (first args))
(multiple-value-bind (args opts errors) (getopt:getopt (rest args)
'(("index" :required)
("languages" :required)
("help" :none t)
("verbose" :none t)
("debug" :none t)))
(when errors
(usage "invalid option: " (first errors)))
(setq *verbose* (rest (assoc "verbose" opts :test 'string=)))
(setq *debug* (rest (assoc "debug" opts :test 'string=)))
(when (rest (assoc "help" opts :test 'string=))
(let ((index (rest (assoc "index" opts :test 'string=)))
(languages (rest (assoc "languages" opts :test 'string=)))
(directories args))
(when *debug*
(format t "parsed opts = ~a~%" opts)
(format t "non-option args = ~a~%" args)
(format t "output file = ~a~%" index))
(unless index (usage "Missing index"))
(unless languages (usage "Missing languages file"))
(unless (probe-file languages) (usage "Languages file does not exist"))
(when (= 0 (length directories)) (usage "missing arguments"))
(load-languages languages)
(with-open-file (out-stream index :direction :output :if-exists :supersede
:if-does-not-exist :create)
(xml-set-stream out-stream)
;;(xml-set-stream t)
(loop for dir in (sort directories #'string<)
(when *verbose*
(format t "Directory: ~a~%" dir))
(output-structure dir (partition-files dir (list-of-files dir))))
(xml-set-stream nil)))))
;;; run the main program
;;; from exec at top of file sb-ext:*posix-argv* = ("sbcl" "script-file" "arg1"...)
(main (rest sb-ext:*posix-argv*))
Jump to Line
Something went wrong with that request. Please try again.