# Introduction to Big Data Modern Technologies course

## TOPIC 3: Hadoop and MapReduce practice
### Intro and Jupyter hints

### 1. Basic commands and variables

#### 1.1. Unix terminal

In [None]:
!help

In [None]:
!pwd

In [None]:
!ls

In [None]:
!date

In [None]:
!touch demo.txt
!ls -la

In [None]:
!echo some text

In [None]:
!echo some text > demo.txt

In [None]:
!cat demo.txt

In [None]:
!echo some more text > demo.txt

In [None]:
!cat demo.txt

In [None]:
!echo some text > demo.txt
!echo some more text >> demo.txt

In [None]:
!cat demo.txt

#### 1.2. Unix terminal and notebook variables

##### Notebook variables

In [None]:
some_variable = 'pwd'

In [None]:
!echo some_variable

In [None]:
!echo {some_variable}

In [None]:
!{some_variable}

In [None]:
path = '/home/jovyan/ibdt_course_miba_24/topic_3'
file = 'demo.txt'
!cat {path + '/' + file}

##### Be carefull: Python vs shell variables

In [None]:
!another_variable="hello"

In [None]:
!echo ${another_variable}

In [None]:
!another_variable="hello" 
!echo ${another_variable}

In [None]:
!another_variable="hello" && echo ${another_variable}

#### 1.3. Simple combinations with Python and Unix terminal

In [None]:
flag = False

In [None]:
if flag:
    !echo hello
else:
    !echo not hello

In [None]:
flag_install = False

In [None]:
if flag:
    # install Pandas and NumPy (because it is needed for Pandas)
    !pip install pandas
else:
    # only NumPy install
    !pip install numpy

#### 1.4. Jupyter's magic

[Here](https://ipython.readthedocs.io/en/stable/interactive/magics.html) is the magic of Jupyter.

In [None]:
%magic

In [None]:
%lsmagic

In [None]:
%pwd

In [None]:
%ls

#### 1.5. Jupyter's cell magic (bash)

In [None]:
%%bash

pwd

In [None]:
%%bash

pwd
ls -la

In [None]:
%%bash

bash_variable="hello"
echo $bash_variable
echo ${bash_variable}

#### 1.5. Jupyter's cell magic (not only bash)

In [None]:
%%js
alert('I can run JavaScript. Ha-ha, you did not expect this message!');

### 2. Bash

##### Loops

In [None]:
%%bash

for item in first second third fourth fifth
  do
    echo ${item}
  done

In [None]:
%%bash

for item in "the first" "the second" "the third"
  do
    echo ${item} item
  done

In [None]:
%%bash

for item in "the first" "the second" "the third"
  do
    echo "here comes ${item} item"
  done

In [None]:
%%bash

file="demo.txt"
cat ${file}

In [None]:
%%bash

file="demo.txt"
for word in $(cat ${file})
  do
    echo ${word}
  done

In [None]:
%%bash

# here we use 
# environment variable `IFS`
# to split list by new line

file="demo.txt"
IFS=$'\n'
for line in $(cat $file)
  do
    echo ${line}
  done

In [None]:
%%bash

for file in /home/jovyan/ibdt_course_miba_23/topic_3/*
  do
    echo $file
  done

##### If-else

In [None]:
%%bash

var=1

if [[ ${var} -gt 5 ]]
then
  echo "variable is > 5"
else
  echo "variable is < 5"
fi

In [None]:
%%bash

for file in /home/jovyan/*
  do
    if [ -d "$file" ]
      then
        echo "$file is a directory"
    elif [ -f "$file" ]
      then
        echo "$file is a file"
    fi
  done

##### File operations

In [None]:
%%bash

cp demo.txt one_more_demo.txt
ls -la
echo "**************************************"
cat one_more_demo.txt

In [None]:
%%bash

mkdir useless_directory
ls -la

echo "**************************************"

mv one_more_demo.txt useless_directory/
ls -la useless_directory/

echo "**************************************"

cat useless_directory/one_more_demo.txt

In [None]:
# use `rm -rf` with care!!!

!rm -rf useless_directory

##### Run Python script

In [None]:
!which python

In [None]:
!echo "x = 'I am a Python script and I am running'" > demo.py
!echo "print(x)" >> demo.py
!cat demo.py

In [None]:
!python demo.py

### 3. Pipes

Here is a [good article](https://medium.com/linuxstories/bash-pipes-and-redirections-4c267c13643b), but we need only `pipe` concept.

In [None]:
!wc -l *

In [None]:
!wc demo.txt

In [None]:
!wc -w demo.txt

In [None]:
!wc -l demo.txt

In [None]:
!wc -l < demo.txt

In [None]:
!cat demo.txt

In [None]:
!cat demo.txt | wc -l

In [None]:
!cat demo.txt | grep more

In [None]:
!cat demo.txt | grep text

In [None]:
!ls -la

In [None]:
!ls -la | grep txt

In [None]:
!ls -la | wc -l

In [None]:
!ls -la | grep txt | wc -l

In [None]:
!ls | grep txt | sort

In [None]:
!ls | grep txt | sort -r

### 4. Yield

#### Example 1

In [None]:
def odd_number(numbers) :
    for n in numbers:
        if n % 2 != 0:
            yield n

In [None]:
numbers = [0, 1, 2, 3, 4, 5, 6, 7]

In [None]:
print('odd numbers:')
for n in odd_number(numbers):
    print(n, end=' ')

#### Example 2

In [None]:
def cube_generator():
    n = 1
    while True:
        yield n ** 3                
        n += 1 # starts from here if called again

In [None]:
cug = cube_generator()

In [None]:
cug

In [None]:
next(cug)

In [None]:
counter = 1
for n in cube_generator():
    if counter > 10:
        break   
    print(n)
    counter += 1

#### Example 3

In [None]:
def want_more(text):
    text = text.split()
    for word in text:
        print(word)
        if word == 'more':
            yield True

In [None]:
text = 'some text some more text and text'

In [None]:
wm = want_more(text)

In [None]:
next(wm)

In [None]:
next(wm)

In [None]:
try:
    next(wm)
except StopIteration:
    print('no more left to do')
    pass

__MOTIVATION:__ It is recommended to use `yield` when we want to iterate over a sequence, but we do not want to keep sequence in memory (e.g. because of resource constraints).