# Terminal Assessment
The files included are as follow
```yaml
main.sh: answers to the questions
trycatch.sh: try/catch util; source is in the file
histogram.sh: includes histogram func and dependencies to draw a histogram
bridge_len.sh: executable, which provides total length given a type of bridge
```

> In this notebook, the scripts (ie `main.sh & histogram.sh`) have been split
> accordingly to improve readability.
> 
> Although, due to multiple cells we need to declare
> vars multiple times

In [1]:
%%bash

echo BashVersion: $BASH_VERSION
echo $(sw_vers)

BashVersion: 3.2.57(1)-release
ProductName: Mac OS X ProductVersion: 10.15.4 BuildVersion: 19E287


### File Download

In [2]:
%%bash

source trycatch.sh

link="http://archive.ics.uci.edu/ml/machine-learning-databases/bridges/bridges.data.version1"
file=$(echo $link | tr '/' '\n' | tail -1)  # original file name

# download file if needed
# shellcheck disable=SC1009
if [ ! -f "$file" ]; then
  try {
    set +e
    curl --url "$link" --output "$file";
    echo "File '$file' downloaded successfully!";
  } catch {
    echo "File '$file' failed to download";
  }
else
  echo "File '$file' already exists";
fi


File 'bridges.data.version1' already exists


trycatch.sh: line 6: declare: -g: invalid option
declare: usage: declare [-afFirtx] [-p] [name[=value] ...]


### Add Column Names

In [3]:
%%bash

cols="IDENTIF,RIVER,LOCATION,ERECTED,PURPOSE,LENGTH,LANES,CLEAR-G,T-OR-D,MATERIAL,SPAN,REL-L,TYPE"
file="bridges.data.version1"  # this var already exists

# add column names if needed
line="$(head -1 $file)"
if [[ ! $line == $cols ]]; then

  echo "adding column names to $file...";
  sed -i "" -e $'1 i\\\n'"$cols" $file;  # this may not work in other operating systems

else
  echo "column names are already added to $file";
fi

printf "\n\nFILE HEADER:\n\n"

sed -n 1,3p $file

column names are already added to bridges.data.version1


FILE HEADER:

IDENTIF,RIVER,LOCATION,ERECTED,PURPOSE,LENGTH,LANES,CLEAR-G,T-OR-D,MATERIAL,SPAN,REL-L,TYPE
E1,M,3,1818,HIGHWAY,?,2,N,THROUGH,WOOD,SHORT,S,WOOD
E2,A,25,1819,HIGHWAY,1037,2,N,THROUGH,WOOD,SHORT,S,WOOD


### Generate Histogram for "PURPOSE" & "LENGTH"

#### Create histogram function for multiple cases
While "length" is numeric (int) and needs to be grouped within intervals, "purpose" is categorical and we cannot use bins.

*Note: Bash and `histogram.sh` do not support floats. In this case it will not show the data within intervals. Sometimes the last interval exceeds the max array value, but this does not affect the results*

In [4]:
%%bash
#!/usr/bin/env bash

# shellcheck disable=SC2120
function intervals() {
  local Bin=$1
  local Vec=("${@:2}")
  local uni=($(echo "${Vec[@]}" | tr " " "\n" | sort -u)) # sort expects new lines
  checker=true                                            # checker if Vec[@] are int

  # if bins int & gt 0
  if [ "$Bin" -gt 1 ] && [ "$Bin" -lt ${#Vec[@]} ]; then
    for u in "${uni[@]}"; do
      # if any observation not int
      if ! [ "$u" -eq "$u" ]; then
        checker=false
        break
      fi
    done
  else
    checker=false
  fi

  if $checker; then
    local M=${Vec[0]}
    local m=${Vec[0]}

    for n in "${Vec[@]}"; do
      ((n > M)) && M=$n
      ((n < m)) && m=$n
    done
    
    local size=$(((M - m) / Bin))
    # round bins accordingly
    if (((M-m)%bin >= (bin+1)/2)); then
        local size=$((size+1))
    fi

    for ((i = 0; i <= Bin; i++)); do
      i_array+=($((m + i * size)))
    done
  else
    i_array=("${uni[@]}")
  fi
}

# example
bin=5
arr=(10 9 8 7 6 5 4 3 2 1 0)
intervals "$bin" "${arr[@]}"

echo "init: ${arr[@]}"
echo "ints? $checker"
echo "bins: ${i_array[@]}"


init: 10 9 8 7 6 5 4 3 2 1 0
ints? true
bins: 0 2 4 6 8 10


#### Histogram function
It calls `intervals` and uses globals `i_array & checker` to determine the intervals and behaviour

In [5]:
%%bash
#!/usr/bin/env bash

function histogram() {
  local Bin=$2
  local Vec=("${@:3}")
  intervals "$Bin" "${Vec[@]}"
  declare -a hist

  for n in "${Vec[@]}"; do
    declare -i i=0
    if $checker; then
      while (($n >= ${i_array[i]})); do
        i+=1
      done
    else
      while [[ $n != "${i_array[i]}" ]]; do
        i+=1
      done
    fi
    hist[i]+="="
  done

  echo; echo "TITLE: $1"
  for i in "${!hist[@]}"; do
    if $checker; then
      echo "${i_array[i - 1]}"-"${i_array[i]}": ${hist[i]}
    else
      echo "${i_array[i]}": "${hist[i]}"
    fi
  done
}


#### Get Column indices and names in an array

In [6]:
%%bash

cols="IDENTIF,RIVER,LOCATION,ERECTED,PURPOSE,LENGTH,LANES,CLEAR-G,T-OR-D,MATERIAL,SPAN,REL-L,TYPE"
c_arr=($(echo $cols | tr ',' ' '))

arr=("PURPOSE" "LENGTH")

# $i+1 is used because arrays start by 0
# but `cut` starts by 1
for col in "${arr[@]}"; do
  for i in ${!c_arr[@]}; do
    if [ "${c_arr[i]}" == "$col" ]; then
      idx[$((i+1))]=$col
      echo "$col @ $((i+1))"
      break
    fi
  done
done


PURPOSE @ 5
LENGTH @ 6


#### Nan_remover func & run histograms
*Warning: if all vals are not `int` but works as expected* 

*Note: Each "=" is an occurence*

In [7]:
%%bash

source histogram.sh

file="bridges.data.version1"
declare -a idx=([5]="PURPOSE" [6]="LENGTH")  # this has gotten above

# gets the array & the val to be removed ($1)
function nan_remover() {
  local Vec=("${@:2}")
  local Nan=$1

  for val in "${Vec[@]}"; do
    # shellcheck disable=SC2053
    if [[ $val != $Nan ]]; then
      new_arr+=("$val")
    fi
  done
  echo "${new_arr[@]}"
}

bin=10
for i in "${!idx[@]}"; do
  arr=($(cat $file | cut -d ',' -f"$i" | tail -n +2))  # start @ line 2
  arr=($(nan_remover ? "${arr[@]}"))
  histogram "${idx[i]}" "$bin" "${arr[@]}"  # args: title bin_num array
done


TITLE: PURPOSE
AQUEDUCT: ====
WALK: =

TITLE: LENGTH
2308-2684: =====
2684-3060: ==
3436-3812: =
3812-4188: =
4188-4564: =


histogram.sh: line 14: [: AQUEDUCT: integer expression expected


### Count # of Bridges with length over 1000

> ***Notice***
> 
> In this case, the result can be found with `histogram` func by using `bins=(max-min)/(1000-min)`.
> 
> However, this methodology cannot be used in every possible case since `histogram` does not support
> floats.

In [8]:
%%bash

file="bridges.data.version1"

function nan_remover() {
  local Vec=("${@:2}")
  local Nan=$1

  for val in "${Vec[@]}"; do
    # shellcheck disable=SC2053
    if [[ $val != $Nan ]]; then
      new_arr+=("$val")
    fi
  done
  echo "${new_arr[@]}"
}


arr=($(cat $file | cut -d ',' -f6 | tail -n +2))
arr=($(nan_remover ? "${arr[@]}"))
lim=1000

declare -i total=0
for i in "${arr[@]}"; do
  if (( $i > $lim )); then
    (( total++ ))
  fi
done

echo ">$lim Vals: $total"
echo "Total Vals: ${#arr[@]}";


>1000 Vals: 59
Total Vals: 81


### Replace `?` with `0` in Terminal

In [9]:
!tr ? 0 < bridges.data.version1 > bridges.data.new && echo success

success


### Executable which takes `TYPE` and returns total `LENGTH` of `TYPE`

#### bridge_len script

In [10]:
%%bash
#!/usr/bin/env bash
# bridge_len.sh

my_var=$1
file="bridges.data.new"

length=($(cat $file | cut -d ',' -f6 | tail -n +2))
type=($(cat $file | cut -d ',' -f13 | tail -n +2))

declare -i counter=0
for i in "${!type[@]}"; do
  if [[ "${type[i]}" == "$my_var" ]]; then
    counter=$(( counter+length[i] ))
  fi
done

if ((${#my_var})); then
    echo "TOTAL LENGTH OF $my_var BRIDGES: $counter";
else
    echo "PLEASE INPUT 'TYPE' OF BRIDGE. Available types:"
    echo "${type[@]}" | tr " " "\n" | sort -u | tr "\n" " "
fi


PLEASE INPUT 'TYPE' OF BRIDGE. Available types:
0 ARCH CANTILEV CONT-T NIL SIMPLE-T SUSPEN WOOD 

#### make it executable & run it

In [11]:
!chmod +x bridge_len.sh && ls -l bridge_len.sh && echo;
!./bridge_len.sh WOOD

-rwxr-xr-x  1 vbardakos  staff  386 Dec 14 13:23 [31mbridge_len.sh[m[m

TOTAL LENGTH OF WOOD BRIDGES: 9519
