From d7ea8f96563dbd7d007342a528a856b14e220635 Mon Sep 17 00:00:00 2001 From: Chris Beck Date: Sun, 6 Jul 2014 22:56:03 -0400 Subject: [PATCH] add header analysis scripts to the utils directory Maybe someone will find them useful... --- utils/headers/build_headers.sh | 117 +++++++++++++++++++++++++++++++++ utils/headers/header_times.sh | 53 +++++++++++++++ utils/headers/readme | 15 +++++ 3 files changed, 185 insertions(+) create mode 100755 utils/headers/build_headers.sh create mode 100755 utils/headers/header_times.sh create mode 100644 utils/headers/readme diff --git a/utils/headers/build_headers.sh b/utils/headers/build_headers.sh new file mode 100755 index 000000000000..c4d7946b2756 --- /dev/null +++ b/utils/headers/build_headers.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +usage() +{ + echo "Usage:" $0 "[OPTIONS]" + echo + echo "Computes all the (deep) header dependencies for each file (compilation unit) in" + echo "the wesnoth project." + echo + echo "The calculated dependency lists are placed out of tree, in a subdirectory" + echo "'headers' of the root of the repostory, in order that they may be conveniently" + echo "grepped or similar." + echo + echo "A ranking of most commonly used headers is generated, in header_rank.log, based" + echo "on the number of compilation units which use the header." + echo + echo "The tool expects the current working directory to be the root directory of the" + echo "repository." + echo + echo -e "Options:" + echo -e "\t-h\tShows this help." + echo -e "\t-s\tShow source dependencies." + echo -e "\t-b\tShow boost dependencies." + echo -e "\t-i\tShow all /usr/include dependencies." + echo -e "\t-y\tShow all /usr/bin (system) dependencies." + echo + echo -e "\tBy default *all* dependencies are shown." + echo -e "\tIf multiple flags are passed, the OR of these is shown." + echo + echo -e "\t-m arg\tUse a custom pattern. Pass a regexp as an argument to" + echo -e "\t\tmatch against the paths of included files." + echo -e "\t\tCan't use this with other options." + echo + echo + echo "Example Usage:" + echo + echo -e "\t./build_headers.sh -s" + echo + exit 1; +} + +echo "Reading options..." +dir_pattern="" +src_pattern="\(src\/\)" +boost_pattern="\(\/usr\/include\/boost\/\)" +incl_pattern="\(\/usr\/include\/\)" +bin_pattern="\(\/usr\/bin\/\)" + +while getopts ":hsbiym:" Option +do + case $Option in + h ) + usage + exit 0; + ;; + s ) + echo "Adding source includes..." + if [ -n "$dir_pattern" ]; then + dir_pattern+="\|" + fi + dir_pattern+="$src_pattern" + ;; + b ) + echo "Adding boost includes..." + if [ -n "$dir_pattern" ]; then + dir_pattern+="\|" + fi + dir_pattern+="$boost_pattern" + ;; + i ) + echo "Adding /usr/include includes..." + if [ -n "$dir_pattern" ]; then + dir_pattern+="\|" + fi + dir_pattern+="$incl_pattern" + ;; + y ) + echo "Adding bin includes..." + if [ -n "$dir_pattern" ]; then + dir_pattern+="\|" + fi + dir_pattern+="$bin_pattern" + ;; + m ) + echo "Matching against pattern:" + dir_pattern="$OPTARG" + echo "$dir_pattern" + ;; + esac +done +shift $(($OPTIND - 1)) + +echo "Final pattern:" "$dir_pattern" + +INCLUDE_STR="-Isrc -I/usr/include/SDL -I/usr/include -I/usr/include/pango-1.0 -I/usr/include/cairo -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/freetype2 -I/usr/include/libpng12 -I/usr/include/dbus-1.0 -I/usr/lib/x86_64-linux-gnu/dbus-1.0/include -I/usr/include/fribidi" + +echo "Building header include database in wesnoth/headers/..." +[ -d headers ] || mkdir headers +pwd +#find src/ -type f -print0 | xargs -0 ./build_header.sh +cd src +for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do + if [ ! -f ../headers/"$file" ]; then + mkdir -p ../headers/"$file" + rmdir ../headers/"$file" + fi + cd .. + echo "src/${file:2}" + #read -p "asdf" + clang++ -H $INCLUDE_STR "src/${file:2}" 2>&1 >/dev/null | sed -n '/^\.*\. / p' | sed -e 's/^\.* //g' -e ':loop' -e 's|/[[:alnum:]_-\.]*/\.\./|/|g' -e 't loop' | sed -n '/^'"$dir_pattern"'/ p' | sort | uniq >headers/"${file:2}" + cd src +done +cd .. +echo "ranking headers" +find headers/ -type f -exec cat {} + | sort | uniq -c | sort -k1 --numeric --reverse > "header_rank.log" +echo "wrote to header_rank.log" +echo "Finished." diff --git a/utils/headers/header_times.sh b/utils/headers/header_times.sh new file mode 100755 index 000000000000..d8c50ea9eae9 --- /dev/null +++ b/utils/headers/header_times.sh @@ -0,0 +1,53 @@ +#!/bin/bash +#Compute header times. Takes an scons build log with debug=time on, file name as first and only arg. +set -e + +if [[ "$#" -ne 1 ]]; then + echo "Usage:" $0 " [scons-log-file]" + echo + echo "Ranks headers according to the aggregate build time of compilation units which" + echo "read them. In other words, the ranking answers the question 'if I stopped a" + echo "wesnoth build at a random point in time, which headers are most likely to have" + echo "been read by that compilation unit?'" + echo + echo "Expects to take the name of a log file from an scons build (with debug=time" + echo "option passed in) as first and only arg. This file must be located at the root" + echo "of the wesnoth repository directory, and the argument should just be its name" + echo "and extension." + echo + echo "Expects the current working directory to be the root directory of the repo." + echo + echo "Example Usage:" + echo + echo -e "\t./build_headers.sh -s" + echo -e "\t./header_times.sh travis_log_sample.log" + echo + exit 1; +fi + +rm -r headers-annotated +cp -fR headers headers-annotated +cd headers-annotated/ +for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do + echo "src/${file:2}" + if grep -q "src/${file:2}$" "../$1"; then + #echo "match:" '\_ src/'"${file:2}$"'_ { N; s/.*\n//p; }' + header_time=$(cat "../$1" | sed -n '\| src/'"${file:2}$"'| { N; s|.*\n||p; }' | sed -n 's/.*\( [0-9\.]* \).*/\1/p' ) + #echo "header time:" "$header_time" + sed -i 's/^.*$/& '"$header_time"'/' "${file:2}" + else + rm "${file:2}" + fi +done +cd .. +echo "Summing results..." +find headers-annotated/ -name "*.cpp" -type f -exec cat {} + | sort -s -g -k 1,1 | awk '{ + arr[$1]+=$2 + } + END { + for (key in arr) printf("%s\t%s\n", arr[key], key ) + }' \ + | sort -k1 --numeric --reverse > "header_time_rank.log" +echo "wrote to header_time_rank.log" +echo "Finished." +less header_time_rank.log diff --git a/utils/headers/readme b/utils/headers/readme new file mode 100644 index 000000000000..196290fa5ebf --- /dev/null +++ b/utils/headers/readme @@ -0,0 +1,15 @@ +This directory contains tools to help analyze header dependencies. + +The first script is "build_headers". It runs clang with -H option, +to generate header dependencies, over the entire source directory, +and builds out of tree copies of all files with a listing of all +headers they directly or indirectly include. You may run it with +various filters for level of detail. + +Build headers will generate a sorted "rank" file which indicates +how many compilation units include each header. + +The second script is "header_times". It takes a compilation log, +generated by scons with debug=time option (such as found on +travis), and builds a second ranking in which compilation units +are weighted by the total time that they take.