Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
executable file 136 lines (113 sloc) 2.53 KB
#!/bin/sh
# locate the latest dump file and download
# continuing if a partial download was present
ERROR()
{
echo error: $*
exit 1
}
USAGE()
{
[ -z "$1" ] || echo error: "$*"
echo usage: $(basename "$0") '<options> [<language-code>{:<type>}...]'
echo ' --help -h this message'
echo ' --verbose -v more messages'
echo ' --url=<url> -u <url> base URL for downloads ['${RootURL}']'
echo ' --download=<dir> -d <dir> download directory [No Download]'
echo ' --ext=<text> -x <text> file name extension ['${ext}']'
echo ' --rate=<rate> -r <rate> download rate limit ['${rate}']'
exit 1
}
# ListOfAllContentTypes, TypeToName, TypeToFilename
. "$(dirname "$0")/wiki-names.sh"
# main program
verbose=no
debug=no
RootURL=http://download.wikipedia.org
ext=pages-articles.xml.bz2
download=
rate=50k
getopt=/usr/local/bin/getopt
[ -x "${getopt}" ] || getopt=getopt
args=$(${getopt} -o hvu:d:x:r:D --long=help,verbose,url,download:,ext:,rate:,debug -- "$@") ||exit 1
# replace the arguments with the parsed values
eval set -- "${args}"
while :
do
case "$1" in
-v|--verbose)
verbose=yes
shift
;;
-u|--url)
RootURL=$2
shift 2
;;
-d|--download)
download=$2
[ -d "${download}" ] || USAGE ${download} is not a directory
shift 2
;;
-x|--ext)
ext=$2
shift 2
;;
-r|--rate)
rate=$2
shift 2
;;
-D|--debug)
debug=yes
shift
;;
--)
shift
break
;;
-h|--help)
USAGE
;;
*)
USAGE invalid option: $1
;;
esac
done
if [ $# -lt 1 ]
then
USAGE missing arguments
fi
# process each lang{:type} argument
for arg in $*
do
arg="${arg}:::::::::::"
lang="${arg%%:*}"
arg="${arg#*:}"
typename="${arg%%:*}"
[ -z "${typename}" ] && typename=pedia
file="${lang}$(TypeToFilename "${typename}")"
wikiname="${file%%-*}"
url="${RootURL}/${wikiname}"
stamp=$(lynx -dump "${url}" | awk '
BEGIN {
dir = ""
}
END{
if ("" != dir) {
print(dir)
}
}
/\[[[:digit:]]+\][[:digit:]]/ {
if (match($1, "^.*]([[:digit:]]+)[^[:digit:]]*.*$", m)) {
dir = m[1]
}
}')
file="${wikiname}-${stamp}-${ext}"
remote="${RootURL}/${wikiname}/${stamp}/${file}"
echo Download: ${remote}
if [ -n "${download}" ]
then
local="${download}/${file}"
echo ' To:' ${local}
wget --no-directories --continue --limit-rate="${rate}" --output-document="${local}" "${remote}"
fi
done
Something went wrong with that request. Please try again.