Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 398 lines (325 sloc) 10.77 kB
#!/bin/sh
# Script to process multiple items on one rendering server
LicensesDirectory=XML-Licenses
ERROR()
{
echo error: $*
exit 1
}
USAGE()
{
[ -z "$1" ] || echo error: "$*"
echo usage: $(basename "$0") '<options> [<args>...]'
echo ' arg <language>:<dir_suffix>:<file_prefix>:<links>:<count>:<size>:<parallel>:<truncate>'
echo ' --help -h this message'
echo ' --verbose -v more messages'
echo ' --articles -a <n> articles per block ['${default_articles_per_block}']'
echo ' --block-size -b <n>[k] max size of block ['${default_block_size}']'
echo ' --index-only -i only build the index'
echo ' --no-run -n do not run final make'
echo ' --clear -c clear work and dest dirs - so everything will be built'
echo ' --re-parse -R clear parse and render stamps - to reuse existion index'
echo ' --re-render -r clear render stamps - for re-rendering with diffferent compression'
echo ' --machines=<m> -m <m> set machine count ['${machine_count}']'
echo ' --parallel=<n> -j <n> set make -j value ['${parallel_build}']'
echo ' --work=<dir> -w <dir> workdir ['${work}']'
echo ' --dest=<dir> -d <dir> destdir ['${dest}']'
echo ' --temp=<dir> -t <dir> tempdir ['${temp}']'
echo ' --farm=<f> -f <f> manual farm number [from hostname numeric suffix]'
echo ' --debug -D only display make operations, do not execute them'
echo ' Arguments: defaults: en:pedia:wiki:YES:'${default_articles_per_block}':'${default_block_size}':<n>:unlimited'
echo ' links = [YES|NO] to enable inclusion of language links'
echo ' count:size = max articles/max bytes to compress into one block'
echo ' parallel = overrides the global --parallel=<n> for this item'
echo ' negative value reduces from global value'
echo ' truncate = truncated article byte-code that exceeds this'
echo 'examples:'
echo '1. index, parse, render japedia run with 36 threads followed by enpedia with 3 * (12 - 2) == 30 threads'
echo ' enpedia would have no language links, high compression and log articles truncated to 30,000 bytes'
echo ' Run --parallel=12 --machines=3 --clear --re-parse ja en:::NO:255:350000:-2:30000'
echo '2. re-render enpedia using the existing index/parse data with low compression, language links and full articles'
echo ' (30 threads must be used since the there will already be 30 parse data files)'
echo ' Run --parallel=12 --machines=3 --re-render en::::::-2'
exit 1
}
# ListOfAllContentTypes, TypeToName, TypeToFilename
. "$(dirname "$0")/wiki-names.sh"
# run a command or just print it
RUN()
{
local command log
log=yes
case "$1" in
-n|--no-log)
log=no
shift
;;
*)
;;
esac
command="$*"
case "${debug}" in
[yY]|[yY][eE][sS])
echo DEBUG: ${command}
return
;;
*)
;;
esac
case "${log}" in
[yY]|[yY][eE][sS])
echo | tee -a "${LogFile}"
echo '===>' $(date '+%Y-%m-%d %H:%M:%S'): ${command} | tee -a "${LogFile}"
eval "time ${command}" 2>&1 | tee -a "${LogFile}"
;;
*)
eval "${command}"
esac
}
# test in value is a number (i.e. non-empty and only contains [0..9])
IsNumber()
{
local value="$1"
case "${value}" in
""|*[!0-9]*)
return 1
;;
esac
return 0
}
# test in value is an integer
IsInteger()
{
local value="${1#-}"
IsNumber "${value}"
return $?
}
# main program
verbose=no
clear=no
work=work
dest=image
temp=/tmp
run=yes
IndexOnly=no
farm=
debug=no
MaximumThreads=64
truncate='unlimited'
machine_count=${MACHINE_COUNT:-9}
parallel_build=${PARALLEL_BUILD:-3}
default_articles_per_block=1
default_block_size=131072
getopt=/usr/local/bin/getopt
[ -x "${getopt}" ] || getopt=getopt
args=$(${getopt} -o hvp:a:b:incrRw:d:t:f:p:j:m:D --long=help,verbose,articles:,block-size:,index-only,no-run,clear,re-render,re-parse,work:,dest:,temp:,farm:,parallel:,machines:,debug -- "$@") ||exit 1
# replace the arguments with the parsed values
eval set -- "${args}"
while :
do
case "$1" in
-v|--verbose)
verbose=yes
shift
;;
-a|--articles)
IsNumber "$2" || USAGE not a number in: $1=$2
default_articles_per_block="$2"
shift 2
;;
-b|--block-size)
default_block_size="$2"
[ X"${default_block_size%ki}ki" = X"${default_block_size}" ] && default_block_size=$((${default_block_size%ki} * 1024))
[ X"${default_block_size%k}k" = X"${default_block_size}" ] && default_block_size=$((${default_block_size%k} * 1000))
IsNumber "${default_block_size}" || USAGE not a number in: $1=$2
shift 2
;;
-i|--index-only)
IndexOnly=yes
clear=index
shift
;;
-n|--no-run)
run=no
shift
;;
-c|--clear)
clear=yes
shift
;;
-r|--re-render)
clear=render
shift
;;
-R|--re-parse)
clear=parse
shift
;;
-w|--work)
work=$2
shift 2
;;
-d|--dest)
dest=$2
shift 2
;;
-t|--temp)
temp=$2
shift 2
;;
-f|--farm)
IsNumber "$2" || USAGE numeric argument required in: $1=$2
farm=$2
shift 2
;;
-j|--parallel)
IsNumber "$2" || USAGE numeric argument required in: $1=$2
parallel_build=$2
shift 2
;;
-m|--machines)
IsNumber "$2" || USAGE numeric argument required in: $1=$2
machine_count=$2
shift 2
;;
-D|--debug)
debug=yes
shift
;;
--)
shift
break
;;
-h|--help)
USAGE
;;
*)
USAGE invalid option: $1
;;
esac
done
if [ $((${machine_count} * ${parallel_build})) -gt ${MaximumThreads} ]
then
echo machine_count=${machine_count}
echo parallel_build=${parallel_build}
USAGE too many threads: "machine_count * parallel_build > ${MaximumThreads}"
fi
for arg in $*
do
arg="${arg}::::::::::::::" # lots of : to ensure empty arguments
language="${arg%%:*}"
[ -z "${language}" ] && language=en
arg="${arg#*:}"
suffix="${arg%%:*}"
[ -z "${suffix}" ] && suffix=pedia
arg="${arg#*:}"
FilePrefix="${arg%%:*}"
[ -z "${FilePrefix}" ] && FilePrefix=wiki
arg="${arg#*:}"
enable_language_links="${arg%%:*}"
case "${enable_language_links}" in
""|[yY]|[yY][eE][sS])
enable_language_links=YES
;;
*)
enable_language_links=NO
;;
esac
arg="${arg#*:}"
articles_per_block="${arg%%:*}"
IsNumber "${articles_per_block}" || articles_per_block="${default_articles_per_block}"
arg="${arg#*:}"
article_block_size="${arg%%:*}"
IsNumber "${article_block_size}" || article_block_size="${default_block_size}"
arg="${arg#*:}"
threads="${arg%%:*}"
IsInteger "${threads}" || threads="${parallel_build}"
[ "${threads}" -lt 0 ] && threads=$((${parallel_build} + ${threads}))
[ "${threads}" -gt "${parallel_build}" -o "${threads}" -le 0 ] && threads="${parallel_build}"
arg="${arg#*:}"
truncate="${arg%%:*}"
IsInteger "${truncate}" || truncate='unlimited'
[ "${truncate}" -le 0 ] && truncate='unlimited'
# license and terms
licenses=$(readlink -m "${LicensesDirectory}")
license="${licenses}/${language}/license.xml"
terms="${licenses}/${language}/terms.xml"
[ -f "${license}" ] || license="${licenses}/en/license.xml"
[ -f "${terms}" ] || terms="${licenses}/en/terms.xml"
# override license/terms if specific version is required
[ -f "${licenses}/${language}/${suffix}-license.xml" ] && license="${licenses}/${language}/${suffix}-license.xml"
[ -f "${licenses}/${language}/${suffix}-terms.xml" ] && terms="${licenses}/${language}/${suffix}-terms.xml"
# create articles link
if ! fn="$(TypeToFilename "${suffix}")"
then
USAGE invalid suffix: ${suffix}, [${ListOfAllContentTypes}]
fi
articles_link="${language}${fn}"
articles=$(readlink -m "${articles_link}")
LogFile="${language}-${suffix}.log"
[ -f "${articles}" ] || USAGE error articles link: ${articles_link} not set correctly
# ordering of items is important
xml="${license} ${terms} ${articles}"
# extract numeric suffix from host name
# expect that the rendering hosts are numbered from zero
this_host=$(hostname --short)
this_host_prefix=${this_host%%[0-9]}
this_id=${this_host##*[!0-9]}
[ -z "${this_id}" ] && this_id=0
[ -z "${farm}" ] && farm="farm${this_id}" || farm="farm${farm##*[a-z]}"
# truncate the log file
RUN -n rm -f "${LogFile}"
RUN -n touch "${LogFile}"
# update
RUN git pull
# set up the make options
common_opts="DESTDIR='${dest}' WORKDIR='${work}' TEMPDIR='${temp}'"
common_opts="${common_opts} WIKI_FILE_PREFIX='${FilePrefix}'"
common_opts="${common_opts} WIKI_LANGUAGE='${language}'"
common_opts="${common_opts} WIKI_DIR_SUFFIX='${suffix}'"
common_opts="${common_opts} XML_FILES='${xml}'"
common_opts="${common_opts} VERBOSE='${verbose}'"
common_opts="${common_opts} MACHINE_COUNT='${machine_count}'"
common_opts="${common_opts} PARALLEL_BUILD='${threads}'"
common_opts="${common_opts} ENABLE_LANGUAGES_LINKS='${enable_language_links}'"
common_opts="${common_opts} ARTICLES_PER_BLOCK='${articles_per_block}'"
common_opts="${common_opts} ARTICLE_BLOCK_SIZE='${article_block_size}'"
common_opts="${common_opts} MAX_ARTICLE_LENGTH='${truncate}'"
# clean up
case "${clear}" in
[yY]|[yY][eE][sS])
RUN make clean index-clean "${farm}-clean" cleandirs ${common_opts}
;;
[iI][nN][dD][eE][xX])
RUN make clean index-clean ${common_opts}
;;
[rR][eE][nN][dD][eE][rR])
RUN make clean "${farm}-render-clean" ${common_opts}
RUN make "${farm}-parse-touch" ${common_opts}
;;
[pP][aA][rR][sS][eE])
RUN make clean "${farm}-clean" ${common_opts}
;;
[nN]|[nN][oO])
;;
*)
ERROR unknown clear action: ${clear}
;;
esac
# create root directories and fonts
RUN make createdirs ${common_opts}
RUN make fonts ${common_opts}
# run the build
case "${run}" in
[yY]|[yY][eE][sS])
RUN make farm-index ${common_opts}
case "${IndexOnly}" in
[yY]|[yY][eE][sS])
;;
*)
RUN make --jobs="${threads}" "${farm}" ${common_opts}
;;
esac
esac
RUN echo Processing Completed
done
Jump to Line
Something went wrong with that request. Please try again.