## Notes from [Python Data Analytics](http://www.amazon.com/Python-Data-Analytics-Fabio-Nelli/dp/1484209591/ref=sr_1_1?ie=UTF8&qid=1450048533&sr=8-1&keywords=Python+Data+Analytics)

### 1. Python Functions
* map(function, list)
* filter(function, list)
* reduce(function, list)
* lambda
* list comprehension
* [other built-in functions](https://docs.python.org/2/library/functions.html#reduce)


In [8]:
items = [1, 2, 3, 4, 5]
def inc(x):
    return x+1
print list(map(inc, items))                # use map function
print list(map(lambda x: x+1, items))      # use map and lambda functions
print list(filter(lambda x: x < 4, items)) # use of filter
print reduce((lambda x,y: x+y), items)     # use of reduce

[2, 3, 4, 5, 6]
[2, 3, 4, 5, 6]
[1, 2, 3]
15


Pip commands:
* pip install package_name
* pip search package_name
* pip show package_name
* pip unistall package_name

### 2. NumPy

In [14]:
import numpy as np
a = np.array([1, 2, 3])
print a.ndim  # dimension
print a.size  # total number of elments
print a.shape # shape of the array
print np.zeros((3, 3))   # zero array
print np.ones((3, 3))    # one array

1
3
(3,)
[[ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]


In [15]:
np.arange(0, 10)     # similar as range(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [16]:
np.arange(0, 12, 3)  # with interval, does NOT include 12

array([0, 3, 6, 9])

In [17]:
np.arange(0, 12, 3).reshape(2, 2)   # reshape the array

array([[0, 3],
       [6, 9]])

In [18]:
np.linspace(0, 10, 5)    # include 10

array([  0. ,   2.5,   5. ,   7.5,  10. ])

In [19]:
np.random.random((3, 3))

array([[ 0.0226314 ,  0.05591402,  0.30557851],
       [ 0.61526516,  0.06592523,  0.94704285],
       [ 0.98933642,  0.74626897,  0.19137706]])

In [23]:
A = np.arange(0, 9).reshape(3, 3)
B = np.ones((3, 3))
print A
print B
print A * B         # elementwise multiply
print np.dot(A, B)  # matrix multiply

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]]
[[ 0.  1.  2.]
 [ 3.  4.  5.]
 [ 6.  7.  8.]]
[[  3.   3.   3.]
 [ 12.  12.  12.]
 [ 21.  21.  21.]]


In [25]:
# Indexing
a = np.arange(10, 16)
print a

[10 11 12 13 14 15]


In [29]:
print a[1:5:2]   # from index 1 to index 5 (exlcude), every 2 element
print a[:5:2]
print a[:5:]

[11 13]
[10 12 14]
[10 11 12 13 14]


In [33]:
A = np.arange(10, 19).reshape(3, 3)
print A
print A[0, :]
print A[:, 0]
print A[0:2, 0:2]
print A[[0, 2], 0:2]

[[10 11 12]
 [13 14 15]
 [16 17 18]]
[10 11 12]
[10 13 16]
[[10 11]
 [13 14]]
[[10 11]
 [16 17]]


In [34]:
A.mean(axis=0)   # mean, std, sum et al. along certain axis

array([ 13.,  14.,  15.])

In [36]:
np.apply_along_axis(np.mean, axis=0, arr=A)   # similar as above, here np.mean can be other functions

array([ 13.,  14.,  15.])

In [37]:
A[A < 13]         # selection

array([10, 11, 12])

In [38]:
A.reshape(1, 9)   # reshape the array

array([[10, 11, 12, 13, 14, 15, 16, 17, 18]])

In [39]:
A.ravel()        # turn array into one dimension

array([10, 11, 12, 13, 14, 15, 16, 17, 18])

In [40]:
A.transpose()

array([[10, 13, 16],
       [11, 14, 17],
       [12, 15, 18]])

In [42]:
# combine two arrays
A = np.ones((3, 3))
B = np.zeros((3, 3))
print np.vstack((A, B))   # vertically combine two arrays
print np.hstack((A, B))   # horizontally combine two arrays

[[ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 1.  1.  1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]
[[ 1.  1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  0.  0.]]


In [44]:
# combine multipy 1-d arrays
a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
c = np.array([6, 7, 8])
print np.column_stack((a, b, c))   # stack for each column
print np.row_stack((a, b, c))      # stack for each row

[[0 3 6]
 [1 4 7]
 [2 5 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [46]:
# split arrays
A = np.arange(16).reshape((4, 4))
print A
[B, C] = np.hsplit(A, 2)
print B
print C
[B, C] = np.vsplit(A, 2)
print B
print C

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]
[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [None]:
# A more complex way of splitting
[A1, A2, A3] = np.split(A, [1, 3], axis=1)  # split to 3 parts, 0:1, 1:3, 3:end
print A1
print A2
print A3

More array split can be found [here](http://docs.scipy.org/doc/numpy/reference/generated/numpy.split.html).

Two arrays may be subjected to broadcasting when all their dimensions are compatible, i.e., the length of each dimension must be equal between the two array or one of them must be equal to 1. 

In [47]:
A = np.arange(16).reshape(4, 4)
b = np.arange(4)
print A
print b
print A+b

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[0 1 2 3]
[[ 0  2  4  6]
 [ 4  6  8 10]
 [ 8 10 12 14]
 [12 14 16 18]]


In [None]:
# save and load data
np.save('saved_data', data)
np.load('saved_data.npy')
# read data in a text file
np.genfromtxt('data.csv', delimiter=',', names=True)

In [None]:
# Ch4.