Skip to content

Commit

Permalink
add header analysis scripts to the utils directory
Browse files Browse the repository at this point in the history
Maybe someone will find them useful...
  • Loading branch information
cbeck88 committed Jul 7, 2014
1 parent 0ebaae5 commit d7ea8f9
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 0 deletions.
117 changes: 117 additions & 0 deletions utils/headers/build_headers.sh
@@ -0,0 +1,117 @@
#!/bin/bash

usage()
{
echo "Usage:" $0 "[OPTIONS]"
echo
echo "Computes all the (deep) header dependencies for each file (compilation unit) in"
echo "the wesnoth project."
echo
echo "The calculated dependency lists are placed out of tree, in a subdirectory"
echo "'headers' of the root of the repostory, in order that they may be conveniently"
echo "grepped or similar."
echo
echo "A ranking of most commonly used headers is generated, in header_rank.log, based"
echo "on the number of compilation units which use the header."
echo
echo "The tool expects the current working directory to be the root directory of the"
echo "repository."
echo
echo -e "Options:"
echo -e "\t-h\tShows this help."
echo -e "\t-s\tShow source dependencies."
echo -e "\t-b\tShow boost dependencies."
echo -e "\t-i\tShow all /usr/include dependencies."
echo -e "\t-y\tShow all /usr/bin (system) dependencies."
echo
echo -e "\tBy default *all* dependencies are shown."
echo -e "\tIf multiple flags are passed, the OR of these is shown."
echo
echo -e "\t-m arg\tUse a custom pattern. Pass a regexp as an argument to"
echo -e "\t\tmatch against the paths of included files."
echo -e "\t\tCan't use this with other options."
echo
echo
echo "Example Usage:"
echo
echo -e "\t./build_headers.sh -s"
echo
exit 1;
}

echo "Reading options..."
dir_pattern=""
src_pattern="\(src\/\)"
boost_pattern="\(\/usr\/include\/boost\/\)"
incl_pattern="\(\/usr\/include\/\)"
bin_pattern="\(\/usr\/bin\/\)"

while getopts ":hsbiym:" Option
do
case $Option in
h )
usage
exit 0;
;;
s )
echo "Adding source includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$src_pattern"
;;
b )
echo "Adding boost includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$boost_pattern"
;;
i )
echo "Adding /usr/include includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$incl_pattern"
;;
y )
echo "Adding bin includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$bin_pattern"
;;
m )
echo "Matching against pattern:"
dir_pattern="$OPTARG"
echo "$dir_pattern"
;;
esac
done
shift $(($OPTIND - 1))

echo "Final pattern:" "$dir_pattern"

INCLUDE_STR="-Isrc -I/usr/include/SDL -I/usr/include -I/usr/include/pango-1.0 -I/usr/include/cairo -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/freetype2 -I/usr/include/libpng12 -I/usr/include/dbus-1.0 -I/usr/lib/x86_64-linux-gnu/dbus-1.0/include -I/usr/include/fribidi"

echo "Building header include database in wesnoth/headers/..."
[ -d headers ] || mkdir headers
pwd
#find src/ -type f -print0 | xargs -0 ./build_header.sh
cd src
for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
if [ ! -f ../headers/"$file" ]; then
mkdir -p ../headers/"$file"
rmdir ../headers/"$file"
fi
cd ..
echo "src/${file:2}"
#read -p "asdf"
clang++ -H $INCLUDE_STR "src/${file:2}" 2>&1 >/dev/null | sed -n '/^\.*\. / p' | sed -e 's/^\.* //g' -e ':loop' -e 's|/[[:alnum:]_-\.]*/\.\./|/|g' -e 't loop' | sed -n '/^'"$dir_pattern"'/ p' | sort | uniq >headers/"${file:2}"
cd src
done
cd ..
echo "ranking headers"
find headers/ -type f -exec cat {} + | sort | uniq -c | sort -k1 --numeric --reverse > "header_rank.log"
echo "wrote to header_rank.log"
echo "Finished."
53 changes: 53 additions & 0 deletions utils/headers/header_times.sh
@@ -0,0 +1,53 @@
#!/bin/bash
#Compute header times. Takes an scons build log with debug=time on, file name as first and only arg.
set -e

if [[ "$#" -ne 1 ]]; then
echo "Usage:" $0 " [scons-log-file]"
echo
echo "Ranks headers according to the aggregate build time of compilation units which"
echo "read them. In other words, the ranking answers the question 'if I stopped a"
echo "wesnoth build at a random point in time, which headers are most likely to have"
echo "been read by that compilation unit?'"
echo
echo "Expects to take the name of a log file from an scons build (with debug=time"
echo "option passed in) as first and only arg. This file must be located at the root"
echo "of the wesnoth repository directory, and the argument should just be its name"
echo "and extension."
echo
echo "Expects the current working directory to be the root directory of the repo."
echo
echo "Example Usage:"
echo
echo -e "\t./build_headers.sh -s"
echo -e "\t./header_times.sh travis_log_sample.log"
echo
exit 1;
fi

rm -r headers-annotated
cp -fR headers headers-annotated
cd headers-annotated/
for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
echo "src/${file:2}"
if grep -q "src/${file:2}$" "../$1"; then
#echo "match:" '\_ src/'"${file:2}$"'_ { N; s/.*\n//p; }'
header_time=$(cat "../$1" | sed -n '\| src/'"${file:2}$"'| { N; s|.*\n||p; }' | sed -n 's/.*\( [0-9\.]* \).*/\1/p' )
#echo "header time:" "$header_time"
sed -i 's/^.*$/& '"$header_time"'/' "${file:2}"
else
rm "${file:2}"
fi
done
cd ..
echo "Summing results..."
find headers-annotated/ -name "*.cpp" -type f -exec cat {} + | sort -s -g -k 1,1 | awk '{
arr[$1]+=$2
}
END {
for (key in arr) printf("%s\t%s\n", arr[key], key )
}' \
| sort -k1 --numeric --reverse > "header_time_rank.log"
echo "wrote to header_time_rank.log"
echo "Finished."
less header_time_rank.log
15 changes: 15 additions & 0 deletions utils/headers/readme
@@ -0,0 +1,15 @@
This directory contains tools to help analyze header dependencies.

The first script is "build_headers". It runs clang with -H option,
to generate header dependencies, over the entire source directory,
and builds out of tree copies of all files with a listing of all
headers they directly or indirectly include. You may run it with
various filters for level of detail.

Build headers will generate a sorted "rank" file which indicates
how many compilation units include each header.

The second script is "header_times". It takes a compilation log,
generated by scons with debug=time option (such as found on
travis), and builds a second ranking in which compilation units
are weighted by the total time that they take.

0 comments on commit d7ea8f9

Please sign in to comment.