Motivation and summary
* Syntax to access ndarray (n-dim array):  nd1 = df.values
* df gives more methods
* addressing: nd[row, column], 0 indexed
* nd[0:3, 1:3] - uses slices. last value is one past what you want
* nd[:,1:3] - colon indicates 'all'
* nd[-1:-2, :] - last two rows, all columns
* nd[-3:, :] - last three rows, all columns

In [1]:
import numpy as np

- numpy is a wrapper for arithmetic libraries while pandas is a wrapper for numpy.
- numpy array is represented as arr[rows,columns].
- rows count start from 0 (0 for first row) and columns start from 0 (0 for first row).
- arr[0:3, 1:3] indicates starting from first row, select all rows just before the third row and select columns starting from second to columns just before the fourth column. 
- arr[:,3] indicates select all rows and third column only.
- arr[-1, 1:3] indicates last row and select columns starting from second to columns just before the fourth column. 
- arr[-2,1:3] indicates last 2 rows and select columns starting from second to columns just before the fourth column.
- arr.sum(axis=0) - sums all columns.
- arr.sum(axis=1) - sum all rows.
- arr.argmax() - finds maximum value of an array along with the index.
- arr[:,n:m:t] indicates all rows and specifies range that starts at n and stops just before m, in increments of t.
- arr[:,3] = [1,2,3,4,5] - all the rows for column three will be assigned with the corresponding values in the list. Elements in array should be equal to number of rows. 
- indices = np.array([1,1,2,3) and arr[indicies) - will return the value in corresponding indicies in the array arr. 
- arr[arr<arr.mean()] will return a boolean array corresponding to values less or greater than mean of the array. 

In [2]:
# one dimensional array - can pass list or tuple

print np.array([2,3,4])
print np.array((2,3,4))

[2 3 4]
[2 3 4]


In [3]:
# 2D array - note the use of tuples. list of lists also works

print np.array([(2,3,4), (5,6,7)])
print np.array([[2,3,4], [5,6,7]])

[[2 3 4]
 [5 6 7]]
[[2 3 4]
 [5 6 7]]


In [4]:
# print empty array

print np.empty(5) # 1D empty array
print np.empty((5,5)) # 2D empty array. note tuple

[ -1.49166815e-154  -1.49166815e-154  -1.49166815e-154  -1.49166815e-154
   2.27634690e-314]
[[ -1.49166815e-154  -1.29074108e-231   1.33397724e-322   0.00000000e+000
    0.00000000e+000]
 [  0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000]
 [  0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000]
 [  3.04763097e-047  -1.49166815e-154  -1.49166815e-154   9.38724727e-323
    0.00000000e+000]
 [  0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000]]


In [5]:
# array of ones

print np.ones((3,4))  # default data type is float

print np.ones((3,4), dtype = np.int)

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [6]:
# array of zeros

print np.zeros((3,2))

[[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]]


In [7]:
# Random values from [0.0, 1.0)

print np.random.random((5,4))

print np.random.rand(5,4)  # rand doesn't require a tuple

[[ 0.06531845  0.17775963  0.37950709  0.48012686]
 [ 0.22297962  0.47914366  0.15477302  0.39926873]
 [ 0.79750043  0.9124455   0.81617678  0.97298675]
 [ 0.62660162  0.90897593  0.76247059  0.48186172]
 [ 0.68951619  0.49132777  0.61910407  0.91064385]]
[[ 0.49810953  0.45276565  0.69993874  0.7483208 ]
 [ 0.03177126  0.71305821  0.59738901  0.84252462]
 [ 0.41806687  0.48729968  0.9594253   0.73853706]
 [ 0.48190406  0.86163011  0.54101885  0.01652832]
 [ 0.44425984  0.19145369  0.76210047  0.22960039]]


In [8]:
# Sample numbers from a Gaussian (normal) distribution

print np.random.normal(size = (2,3)) # standard normal

print np.random.normal(50,10, size = (2,3)) # mean 50, sd 10

[[-0.98352994 -0.27961055 -0.07417865]
 [-0.00640134  0.2940844  -1.85257166]]
[[ 46.90191452  45.97588532  46.42122416]
 [ 50.42740941  52.93003555  37.44184799]]


In [9]:
# Random integers

print np.random.randint(10) # a single integer in [0, 10)
print np.random.randint(0, 10) # a random integer [low, high)
print np.random.randint(0, 10, size = 5) # 5 random integers as a 1D array
print np.random.randint(0,10, size = (2,3)) # Random integers between [0, 10) as a 2x3 array

4
6
[2 0 8 0 8]
[[3 3 0]
 [1 1 1]]


In [10]:
# Numpy array attributes

a = np.random.random((5,4))
# print a
print a.shape  # attribute shape gives dimensions of array
print a.shape[0] # number of rows
print a.shape[1] # number of columns
print len(a.shape) # gives number of dimensions
print a.size # gives number of elements
print a.dtype # gives the data type of the elements

(5, 4)
5
4
2
20
float64


In [11]:
# Operations on arrays

np.random.seed(693)
a = np.random.randint(0,10, size = (5, 4))
print a

# Sum of all elements
print 'Sum of all elements:', a.sum()

# Sum by axis. Think of this as which dimension
# is being collapsed, or which dimension you're
# iterating over
print 'Sum of each column:', a.sum(axis=0)
print 'Sum of each row:', a.sum(axis=1)

# Stats: min, max, mean (rows, cols, overall)
print 'Min of columns:', a.min(axis=0)
print 'Max of rows:', a.max(axis=1)
print 'Mean of all elements:', a.mean()

[[2 0 5 1]
 [1 3 4 4]
 [9 2 9 1]
 [9 3 7 5]
 [4 7 0 3]]
Sum of all elements: 79
Sum of each column: [25 15 25 14]
Sum of each row: [ 8 12 21 24 14]
Min of columns: [1 0 0 1]
Max of rows: [5 4 9 9 7]
Mean of all elements: 3.95


In [12]:
# Locate the maximum value

def get_max_index(a):
    '''Return the index of the max value in a 
    given 1D array'''
    return a.argmax()

a = np.array([9,6,2,3,12,14,7,10], dtype=np.int32)
print "Maximum value:", a.max()
print "Index of max:", get_max_index(a)

# Tougher for multi-dimensional arrays. Numpy offers
# unravel_index() (???) to help

Maximum value: 14
Index of max: 5


In [13]:
# Utilizing the time function

from time import time

def how_long(func, *args):
    '''Execute function with given arguments, and
    measure execution time.'''
    t0 = time()
    result = func(*args)
    t1 = time()
    return result, t1 - t0
    
def manual_mean(arr):
    '''Compute mean of all elements in a 2D array'''
    sum = 0
    for i in xrange(0, arr.shape[0]):
        for j in xrange(0, arr.shape[1]):
            sum += arr[i,j]
    return sum / arr.size

def numpy_mean(arr):
    return arr.mean()

nd1 = np.random.random((1000, 10000))

res_manual, t_manual = how_long(manual_mean, nd1)
res_numpy, t_numpy = how_long(numpy_mean, nd1)
speedup = t_manual / t_numpy

print 'Numpy is ', speedup, ' times faster than manual'

Numpy is  246.351592622  times faster than manual


In [14]:
# Accessing array elements

a = np.random.rand(5,4)

# Access element at postion (3, 2) # remember 0 indexing!
print a[3,2]

# Elements in defined range
print a[0, 1:3]
print a[0:2, 0:2]
print a[:, 0:3:2] # 0 to 3 is steps of size 2
print a[:, [0,2]] # same thing with a list

# assign values to specific elements

a[0,0] = 1 # replace one element

a[0, :] = 2 # replace the entire first row

a[:, 3] = [1, 2, 3, 4, 5]

print a

0.262247693848
[ 0.19443186  0.15294266]
[[ 0.76484993  0.19443186]
 [ 0.7152339   0.68874783]]
[[ 0.76484993  0.15294266]
 [ 0.7152339   0.55787223]
 [ 0.7906318   0.31082212]
 [ 0.38303999  0.26224769]
 [ 0.36169014  0.03944179]]
[[ 0.76484993  0.15294266]
 [ 0.7152339   0.55787223]
 [ 0.7906318   0.31082212]
 [ 0.38303999  0.26224769]
 [ 0.36169014  0.03944179]]
[[ 2.          2.          2.          1.        ]
 [ 0.7152339   0.68874783  0.55787223  2.        ]
 [ 0.7906318   0.70489628  0.31082212  3.        ]
 [ 0.38303999  0.21038991  0.26224769  4.        ]
 [ 0.36169014  0.65767671  0.03944179  5.        ]]


In [15]:
# Indexing an array with another array

a = np.random.rand(5)
print a

indices = np.array([1,1,2,3])

print a[indices]

[ 0.91277862  0.19925213  0.82660771  0.13349003  0.28854294]
[ 0.19925213  0.19925213  0.82660771  0.13349003]


In [16]:
# Boolean or Mask index arrays

a = np.array([(20, 25, 10, 23, 26, 32, 10, 5, 0),
              (0, 2, 50, 20, 0, 1, 28, 5, 0)])
print a

mean = a.mean()
print mean

# Masking
print a[a < mean]

# Replacement

a[a < mean] = mean
print a

[[20 25 10 23 26 32 10  5  0]
 [ 0  2 50 20  0  1 28  5  0]]
14.2777777778
[10 10  5  0  0  2  0  1  5  0]
[[20 25 14 23 26 32 14 14 14]
 [14 14 50 20 14 14 28 14 14]]


In [17]:
# Arithmetic operations (1.3.21)

# always applied element-wise

a = np.array([(1, 2, 3, 4, 5),
              (10, 20, 30, 40, 50)])

print a

print 2 * a

b = np.array([(100, 200, 300, 400, 500),
              (1, 2, 3, 4, 5)])

print a + b

print a * b # element-wise multiplication

[[ 1  2  3  4  5]
 [10 20 30 40 50]]
[[  2   4   6   8  10]
 [ 20  40  60  80 100]]
[[101 202 303 404 505]
 [ 11  22  33  44  55]]
[[ 100  400  900 1600 2500]
 [  10   40   90  160  250]]
