# Numpy
## Computation on NumPy Arrays: Universal Function

In [2]:
import numpy as np

In [14]:
x = np.arange(-2, 3)

In [15]:
x

array([-2, -1,  0,  1,  2])

Basic operation

In [16]:
# Addition
x + 5

array([3, 4, 5, 6, 7])

In [17]:
# Subtraction
x - 5

array([-7, -6, -5, -4, -3])

In [18]:
# Multiply
x * 5

array([-10,  -5,   0,   5,  10])

In [19]:
# Division
x / 5

array([-0.4, -0.2,  0. ,  0.2,  0.4])

In [20]:
# Power
x**5

array([-32,  -1,   0,   1,  32], dtype=int32)

In [21]:
# Modulus
x % 2

array([0, 1, 0, 1, 0], dtype=int32)

Numpy Arithmatic Operation

In [22]:
# Addition
np.add(x, 2)

array([0, 1, 2, 3, 4])

In [23]:
# Subtraction
np.subtract(x, 2)

array([-4, -3, -2, -1,  0])

In [24]:
# Negative
np.negative(x)

array([ 2,  1,  0, -1, -2])

In [25]:
# Multiply
np.multiply(x, 2)

array([-4, -2,  0,  2,  4])

In [26]:
# Divide
np.divide(x, 2)

array([-1. , -0.5,  0. ,  0.5,  1. ])

In [27]:
# Floor Divide
np.floor_divide(x, 2)

array([-1, -1,  0,  0,  1], dtype=int32)

In [30]:
# Power
np.power(x, 2)

array([4, 1, 0, 1, 4], dtype=int32)

In [29]:
# Moddulus
np.mod(x, 2)

array([0, 1, 0, 1, 0], dtype=int32)

In [32]:
# Absolute
np.abs(x)
np.absolute(x)

array([2, 1, 0, 1, 2])

---

## Aggregations: Min, Max and Other

Sum

In [33]:
L = np.random.random(100)

In [38]:
# Python Build-in Function
sum(L)

49.92715169944605

In [35]:
# Numpy Function
np.sum(L)

49.927151699446064

In [37]:
%timeit sum(L)
%timeit np.sum(L)

6.11 µs ± 62.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
2.42 µs ± 20.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [41]:
big_array = np.random.random(100000)

In [42]:
%timeit sum(big_array)
%timeit np.sum(big_array)

4.82 ms ± 23.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
26.6 µs ± 251 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


Max and Min

In [49]:
min(big_array)

4.5701354900451463e-07

In [46]:
max(big_array)

0.9951142397104967

In [47]:
np.min(big_array)

4.5701354900451463e-07

In [48]:
np.max(big_array)

0.9999920753402348

In [50]:
%timeit min(big_array)
%timeit np.min(big_array)

3.02 ms ± 61.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
26.3 µs ± 249 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [51]:
%timeit max(big_array)
%timeit np.max(big_array)

2.95 ms ± 15.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
26.3 µs ± 279 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


---

## Multi-dimensional Aggregates


* Aggregation against rows and columns

In [52]:
M = np.random.random((3, 4))

In [53]:
M

array([[0.00378908, 0.70798192, 0.9274944 , 0.7944253 ],
       [0.45748884, 0.29499342, 0.81559107, 0.06099879],
       [0.49106566, 0.04772911, 0.25274386, 0.44366158]])

In [54]:
sum(M)

array([0.95234358, 1.05070444, 1.99582933, 1.29908567])

In [55]:
M.sum()

5.297963025359619

In [57]:
#  Min nos. column wise
M.min(axis=0)

array([0.00378908, 0.04772911, 0.25274386, 0.06099879])

In [59]:
#  Min nos. row wise
M.min(axis=1)

array([0.00378908, 0.06099879, 0.04772911])

---

## Broadcasting

In [61]:
a = np.array([1 ,2, 3])
b = np.array([4, 5, 6])

In [62]:
a + b

array([5, 7, 9])

In [63]:
# Addition of scalar value
a + 5

array([6, 7, 8])

In [65]:
# Boardcast Single Array
M = np.ones((3, 3))

In [66]:
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [67]:
a

array([1, 2, 3])

In [68]:
M.shape

(3, 3)

In [69]:
a.shape

(3,)

In [70]:
M + a

array([[2., 3., 4.],
       [2., 3., 4.],
       [2., 3., 4.]])

In [73]:
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

In [75]:
print(a.shape)
print(b.shape)

(3,)
(3, 1)


In [76]:
print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [77]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

Boradcasting of imcopatible array

In [79]:
a = np.ones((3, 2))
b = np.arange(3)

In [80]:
print(a.shape)
print(b.shape)

(3, 2)
(3,)


In [81]:
print(a)
print(b)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[0 1 2]


In [82]:
a + b

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [88]:
a + b[:, np.newaxis]

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

---

## Comparisons, Masks, and Boolean Logic

- Comparison Operation
1. Less than `<`
2. Greater than `>`
3. Less than and equal `<=`
4. Greater than and equal `>=`
6. Not equal `!=`
7. Equal `==`

In [91]:
x = np.arange(1, 7)

In [92]:
x

array([1, 2, 3, 4, 5, 6])

In [98]:
print(f'Python In-Build: {x < 3}')
print(f'Numpy In-Build: {np.less(x, 3)}')

Python In-Build: [ True  True False False False False]
Numpy In-Build: [ True  True False False False False]


In [100]:
%timeit x < 3
%timeit np.less(x, 3)

753 ns ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
730 ns ± 11 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [99]:
print(f'Python In-Build: {x > 3}')
print(f'Numpy In-Build: {np.greater(x, 3)}')

Python In-Build: [False False False  True  True  True]
Numpy In-Build: [False False False  True  True  True]


In [101]:
print(f'Python In-Build: {x <= 3}')
print(f'Numpy In-Build: {np.less_equal(x, 3)}')

Python In-Build: [ True  True  True False False False]
Numpy In-Build: [ True  True  True False False False]


In [102]:
print(f'Python In-Build: {x >= 3}')
print(f'Numpy In-Build: {np.greater_equal(x, 3)}')

Python In-Build: [False False  True  True  True  True]
Numpy In-Build: [False False  True  True  True  True]


- wo-dimensional Array

In [103]:
rng = np.random.RandomState(0)
x = rng.randint(10, size=(3, 4))

In [104]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [105]:
x < 6

array([[ True,  True,  True,  True],
       [False, False,  True,  True],
       [ True,  True, False, False]])

Count `True` value

In [106]:
np.count_nonzero(x < 6)

8

In [107]:
np.sum(x < 6)

8

In [108]:
# Values in row less than 6
np.sum(x < 6, axis=1)

array([4, 2, 2])

In [109]:
# Values in column less than 6
np.sum(x < 6, axis=0)

array([2, 2, 2, 2])

In [110]:
np.any(x > 8)

True

In [111]:
np.any(x < 0)

False

In [112]:
np.all(x < 8, axis=1)

array([ True, False,  True])

## Example: Seattle Rainfall

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv(r'seattle-weather.csv', encoding='utf8', engine='python')

In [117]:
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle
1,2012/1/2,10.9,10.6,2.8,4.5,rain
2,2012/1/3,0.8,11.7,7.2,2.3,rain
3,2012/1/4,20.3,12.2,5.6,4.7,rain
4,2012/1/5,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog
1457,2015/12/28,1.5,5.0,1.7,1.3,fog
1458,2015/12/29,0.0,7.2,0.6,2.6,fog
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun


In [118]:
df['precipitation']

0        0.0
1       10.9
2        0.8
3       20.3
4        1.3
        ... 
1456     8.6
1457     1.5
1458     0.0
1459     0.0
1460     0.0
Name: precipitation, Length: 1461, dtype: float64

In [119]:
# Days without rain
np.sum(df['precipitation'] == 0)

838

In [121]:
# Numbers of days with rain
np.sum(df['precipitation'] != 0)

623

## Boolean Operator
- bitwise and `&`
- bitwise or `|`
- bitwise not `~`

In [124]:
# All rainy days will less than 15mm and greater than 0mm from
np.sum((df['precipitation'] < 15 ) & (df['precipitation'] > 0))

533

In [127]:
rain = df['precipitation'] > 0

In [128]:
df['precipitation'][rain]

1       10.9
2        0.8
3       20.3
4        1.3
5        2.5
        ... 
1452     6.1
1453     2.5
1454     5.8
1456     8.6
1457     1.5
Name: precipitation, Length: 623, dtype: float64

In [142]:
x

array([[5, 0, 3, 3],
       [7, 9, 3, 5],
       [2, 4, 7, 6]])

In [145]:
x < 5

array([[False,  True,  True,  True],
       [False, False,  True, False],
       [ True,  True, False, False]])

In [146]:
x[x < 5]

array([0, 3, 3, 3, 2, 4])

In [8]:
df['year'] = pd.DatetimeIndex(df['date']).year

In [7]:
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
1456,2015/12/27,8.6,4.4,1.7,2.9,fog,2015
1457,2015/12/28,1.5,5.0,1.7,1.3,fog,2015
1458,2015/12/29,0.0,7.2,0.6,2.6,fog,2015
1459,2015/12/30,0.0,5.6,-1.0,3.4,sun,2015


In [9]:
df2012 = df[df['year'] == 2012]
df2012

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather,year
0,2012/1/1,0.0,12.8,5.0,4.7,drizzle,2012
1,2012/1/2,10.9,10.6,2.8,4.5,rain,2012
2,2012/1/3,0.8,11.7,7.2,2.3,rain,2012
3,2012/1/4,20.3,12.2,5.6,4.7,rain,2012
4,2012/1/5,1.3,8.9,2.8,6.1,rain,2012
...,...,...,...,...,...,...,...
361,2012/12/27,4.1,7.8,3.3,3.2,rain,2012
362,2012/12/28,0.0,8.3,3.9,1.7,rain,2012
363,2012/12/29,1.5,5.0,3.3,1.7,rain,2012
364,2012/12/30,0.0,4.4,0.0,1.8,drizzle,2012
