From 12c8a78c76e5fb68add9aa28e0085d7eee83b021 Mon Sep 17 00:00:00 2001 From: Yegor Bugayenko Date: Mon, 2 Oct 2023 13:00:55 +0300 Subject: [PATCH] #85 join --- steps/{aggregate-add.sh => aggregate-join.sh} | 9 ++++- steps/aggregate-repo.sh | 18 +++++++-- steps/aggregate.sh | 6 +-- steps/lint.sh | 2 +- tests/steps/test-aggregate-join.sh | 37 +++++++++++++++++++ tests/steps/test-aggregate-repo.sh | 12 ++++-- 6 files changed, 71 insertions(+), 13 deletions(-) rename steps/{aggregate-add.sh => aggregate-join.sh} (84%) create mode 100755 tests/steps/test-aggregate-join.sh diff --git a/steps/aggregate-add.sh b/steps/aggregate-join.sh similarity index 84% rename from steps/aggregate-add.sh rename to steps/aggregate-join.sh index da37c986..3d31d2ff 100755 --- a/steps/aggregate-add.sh +++ b/steps/aggregate-join.sh @@ -34,8 +34,15 @@ csvs=$(find "${dir}" -name '*.csv' -maxdepth 1 -exec basename {} \;) total=$(echo "${csvs}" | wc -l | xargs) echo "${csvs}" | while IFS= read -r csv; do + join=${TARGET}/data/${csv} + rm -rf "${join}" + mkdir -p "$(dirname "${join}")" while IFS= read -r t; do - printf '%s,%s\n' "${reop}" "${t}" >> "${TARGET}/data/${csv}" + if [ ! -e "${join}" ]; then + printf 'repo,%s\n' "${t}" > "${join}" + else + printf '%s,%s\n' "$(echo "${repo}" | sed 's/,/\\,/')" "${t}" >> "${join}" + fi done < "${dir}/${csv}" done diff --git a/steps/aggregate-repo.sh b/steps/aggregate-repo.sh index 7319491b..ba407b9d 100755 --- a/steps/aggregate-repo.sh +++ b/steps/aggregate-repo.sh @@ -45,15 +45,25 @@ find "${dir}" -name '*.m' | { metric=${v//${dir}${java}\.m\./} csv=${ddir}/${metric}.csv mkdir -p "$(dirname "${csv}")" - echo "${java},$(cat "${v}")" >> "${csv}" + if [ ! -e "${csv}" ]; then + printf 'java_file,%s\n' "${metric}" > "${csv}" + fi + printf '%s,%s\n' "${java//,/\\,}" "$(cat "${v}")" >> "${csv}" done csv=${ddir}/all.csv mkdir -p "$(dirname "${csv}")" + if [ ! -e "${csv}" ]; then + printf 'java_file' > "${csv}" + for a in ${all}; do + printf ",%s" "${a}" >> "${csv}" + done + printf '\n' >> "${csv}" + fi java=$(echo "${m}" | sed "s|${dir}||" | sed "s|\.m$||") - printf '%s' "${java}" >> "${csv}" + printf '%s' "${java//,/\\,}" >> "${csv}" for a in ${all}; do if [ -e "${m}.${a}" ]; then - value=$(cat "${m}.${a}" | "${LOCAL}/help/float.sh") + value=$("${LOCAL}/help/float.sh" < "${m}.${a}") printf ",%s" "${value}" >> "${csv}" if [ ! "${value}" = "NaN" ]; then sum=$(echo "${sum} + ${value}" | bc | "${LOCAL}/help/float.sh") @@ -62,7 +72,7 @@ find "${dir}" -name '*.m' | { printf ',-' >> "${csv}" fi done - printf "\n" >> "${csv}" + printf '\n' >> "${csv}" done echo "${repo} (${pos}/${total}) aggregated (sum=${sum})$("${LOCAL}/help/tdiff.sh" "${start}")" } diff --git a/steps/aggregate.sh b/steps/aggregate.sh index 78da8cac..15fcaefa 100755 --- a/steps/aggregate.sh +++ b/steps/aggregate.sh @@ -26,7 +26,7 @@ set -o pipefail start=$(date +%s%N) all=$(find "${TARGET}/measurements" -name '*.m.*' -print | sed "s|^.*\.\(.*\)$|\1|" | sort | uniq | tr '\n' ' ') -echo "All $(echo "${all}" | wc -w | xargs) metrics: ${all}" +echo "All $(echo "${all}" | wc -w | xargs) metrics (in alphanumeric order): ${all}" repos=$(find "${TARGET}/measurements" -maxdepth 2 -mindepth 2 -type d -exec realpath --relative-to="${TARGET}/measurements" {} \;) total=$(echo "${repos}" | wc -l | xargs) @@ -53,12 +53,12 @@ echo "${all}" | while IFS= read -r a; do done printf "\n" >> "${TARGET}/data/all.csv" -jobs=${TARGET}/jobs/aggregate-add-jobs.txt +jobs=${TARGET}/jobs/aggregate-join-jobs.txt rm -rf "${jobs}" mkdir -p "$(dirname "${jobs}")" touch "${jobs}" declare -i repo=0 -sh="$(dirname "$0")/aggregate-add.sh" +sh="$(dirname "$0")/aggregate-join.sh" repos=$(find "${TARGET}/data" -maxdepth 2 -mindepth 2 -type d -print) echo "${repos}" | while IFS= read -r d; do r=$(realpath --relative-to="${TARGET}/data" "${d}" ) diff --git a/steps/lint.sh b/steps/lint.sh index d283fa77..095d96b6 100755 --- a/steps/lint.sh +++ b/steps/lint.sh @@ -38,7 +38,7 @@ pylint "${LOCAL}/steps/" rubocop -shellcheck -P "${LOCAL}"/metrics/*.sh -P "${LOCAL}"/filters/*.sh -P "${LOCAL}"/steps/*.sh +shellcheck -P "${LOCAL}"/metrics/*.sh -P "${LOCAL}"/filters/*.sh -P "${LOCAL}"/steps/*.sh -P "${LOCAL}"/tests/*.sh mkdir -p "$(dirname "${flag}")" date +%s%N > "${flag}" diff --git a/tests/steps/test-aggregate-join.sh b/tests/steps/test-aggregate-join.sh new file mode 100755 index 00000000..22703265 --- /dev/null +++ b/tests/steps/test-aggregate-join.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# The MIT License (MIT) +# +# Copyright (c) 2021-2023 Yegor Bugayenko +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +set -e +set -o pipefail + +temp=$1 + +repo="foo/bar,1" +dir="${TARGET}/data/${repo}" +mkdir -p "${dir}" +echo -e "java_file,loc\nFoo.java,42\nBar.java,256" > "${dir}/loc.csv" +msg=$("${LOCAL}/steps/aggregate-join.sh" "${repo}" "${dir}" 1 1) +echo "${msg}" | (grep "sum=0" && exit 1 || true) +test -e "${TARGET}/data/loc.csv" +grep "repo,java_file,loc" "${TARGET}/data/loc.csv" > /dev/null +grep "foo/bar\\\\,1,Foo.java,42" "${TARGET}/data/loc.csv" > /dev/null +echo "👍🏻 A data joined correctly" diff --git a/tests/steps/test-aggregate-repo.sh b/tests/steps/test-aggregate-repo.sh index d33661c4..086503b5 100755 --- a/tests/steps/test-aggregate-repo.sh +++ b/tests/steps/test-aggregate-repo.sh @@ -28,13 +28,17 @@ temp=$1 repo="foo/bar test ; " dir="${TARGET}/measurements/${repo}/a" mkdir -p "${dir}" -touch "${dir}/Foo.java.m" -echo ".75" > "${dir}/Foo.java.m.nhd" -echo "42" > "${dir}/Foo.java.m.loc" +touch "${dir}/Foo,Bar.java.m" +echo ".75" > "${dir}/Foo,Bar.java.m.nhd" +echo "42" > "${dir}/Foo,Bar.java.m.loc" msg=$("${LOCAL}/steps/aggregate-repo.sh" "${repo}" 1 1 'loc nhd') echo "${msg}" | (grep "sum=0" && exit 1 || true) test -e "${TARGET}/data/${repo}/all.csv" +grep "/a/Foo\\\\,Bar.java,42,0.75" "${TARGET}/data/${repo}/all.csv" > /dev/null +grep "java_file,loc,nhd" "${TARGET}/data/${repo}/all.csv" > /dev/null test -e "${TARGET}/data/${repo}/loc.csv" +grep "java_file,loc" "${TARGET}/data/${repo}/loc.csv" > /dev/null +grep "/a/Foo\\\\,Bar.java,42" "${TARGET}/data/${repo}/loc.csv" > /dev/null test -e "${TARGET}/data/${repo}/nhd.csv" -cat "${TARGET}/data/${repo}/loc.csv" | grep ",42" >/dev/null +grep ",42" "${TARGET}/data/${repo}/loc.csv" >/dev/null echo "👍🏻 A repo aggregated correctly"