Motivation and summary
* Syntax to access ndarray (n-dim array):  nd1 = df.values
* df gives more methods
* addressing: nd[row, column], 0 indexed
* nd[0:3, 1:3] - uses slices. last value is one past what you want
* nd[:,1:3] - colon indicates 'all'
* nd[-1:-2, :] - last two rows, all columns
* nd[-3:, :] - last three rows, all columns

In [1]:
import numpy as np

In [6]:
# one dimensional array - can pass list or tuple

print np.array([2,3,4])
print np.array((2,3,4))

[2 3 4]
[2 3 4]


In [7]:
# 2D array - note the use of tuples. list of lists also works

print np.array([(2,3,4), (5,6,7)])
print np.array([[2,3,4], [5,6,7]])

[[2 3 4]
 [5 6 7]]
[[2 3 4]
 [5 6 7]]


In [10]:
# print empty array

print np.empty(5) # 1D empty array
print np.empty((5,5)) # 2D empty array. note tuple

[  0.00000000e+000  -1.35058369e-315   2.15356540e-314   2.15357793e-314
   2.15357796e-314]
[[  0.00000000e+000   1.49166824e-154   2.12325858e-314   2.16245110e-314
    2.12581833e-314]
 [  2.16243177e-314   2.12326980e-314   2.16243200e-314   2.12581833e-314
    2.16243165e-314]
 [  2.12326980e-314   2.16243191e-314   2.12581833e-314   2.16243203e-314
    2.12326980e-314]
 [  2.16243141e-314   2.12581833e-314   2.16243163e-314   2.12326980e-314
    2.16243153e-314]
 [  2.12343360e-314   2.16245110e-314   2.12326980e-314   2.16243189e-314
    2.12343360e-314]]


In [13]:
# array of ones

print np.ones((3,4))  # default data type is float

print np.ones((3,4), dtype = np.int)

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [14]:
# array of zeros

print np.zeros((3,2))

[[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]]


In [16]:
# Random values from [0.0, 1.0)

print np.random.random((5,4))

print np.random.rand(5,4)  # rand doesn't require a tuple

[[ 0.4450037   0.80977716  0.39460639  0.04930945]
 [ 0.73568294  0.82136237  0.70595664  0.27659673]
 [ 0.56765983  0.70184593  0.31991073  0.24659167]
 [ 0.41405919  0.49505704  0.90205291  0.26180955]
 [ 0.91370921  0.92041774  0.53839341  0.25226466]]
[[ 0.58647172  0.50605311  0.68668773  0.77851397]
 [ 0.43266068  0.66881668  0.51630004  0.86802288]
 [ 0.99387208  0.58729772  0.74717489  0.42220703]
 [ 0.38567612  0.08020489  0.71560236  0.84438312]
 [ 0.09352509  0.71340497  0.84470398  0.75784184]]


In [19]:
# Sample numbers from a Gaussian (normal) distribution

print np.random.normal(size = (2,3)) # standard normal

print np.random.normal(50,10, size = (2,3)) # mean 50, sd 10

[[-0.64316626 -1.31473578 -1.05694952]
 [-0.17201293  1.00575489  1.57400936]]
[[ 43.07153392  48.36602628  55.93987215]
 [ 57.42467779  63.45993376  55.34174127]]


In [4]:
# Random integers

print np.random.randint(10) # a single integer in [0, 10)
print np.random.randint(0, 10) # a random integer [low, high)
print np.random.randint(0, 10, size = 5) # 5 random integers as a 1D array
print np.random.randint(0,10, size = (2,3)) # Random integers between [0, 10) as a 2x3 array

1
3
[5 9 1 4 3]
[[3 7 2]
 [8 2 1]]


In [11]:
# Numpy array attributes

a = np.random.random((5,4))
# print a
print a.shape  # attribute shape gives dimensions of array
print a.shape[0] # number of rows
print a.shape[1] # number of columns
print len(a.shape) # gives number of dimensions
print a.size # gives number of elements
print a.dtype # gives the data type of the elements

(5, 4)
5
4
2
20
float64


In [17]:
# Operations on arrays

np.random.seed(693)
a = np.random.randint(0,10, size = (5, 4))
print a

# Sum of all elements
print 'Sum of all elements:', a.sum()

# Sum by axis. Think of this as which dimension
# is being collapsed, or which dimension you're
# iterating over
print 'Sum of each column:', a.sum(axis=0)
print 'Sum of each row:', a.sum(axis=1)

# Stats: min, max, mean (rows, cols, overall)
print 'Min of columns:', a.min(axis=0)
print 'Max of rows:', a.max(axis=1)
print 'Mean of all elements:', a.mean()

[[2 0 5 1]
 [1 3 4 4]
 [9 2 9 1]
 [9 3 7 5]
 [4 7 0 3]]
Sum of all elements: 79
Sum of each column: [25 15 25 14]
Sum of each row: [ 8 12 21 24 14]
Min of columns: [1 0 0 1]
Max of rows: [5 4 9 9 7]
Mean of all elements: 3.95


In [19]:
# Locate the maximum value

def get_max_index(a):
    '''Return the index of the max value in a 
    given 1D array'''
    return a.argmax()

a = np.array([9,6,2,3,12,14,7,10], dtype=np.int32)
print "Maximum value:", a.max()
print "Index of max:", get_max_index(a)

# Tougher for multi-dimensional arrays. Numpy offers
# unravel_index() (???) to help

Maximum value: 14
Index of max: 5


In [23]:
# Utilizing the time function

from time import time

def how_long(func, *args):
    '''Execute function with given arguments, and
    measure execution time.'''
    t0 = time()
    result = func(*args)
    t1 = time()
    return result, t1 - t0
    
def manual_mean(arr):
    '''Compute mean of all elements in a 2D array'''
    sum = 0
    for i in xrange(0, arr.shape[0]):
        for j in xrange(0, arr.shape[1]):
            sum += arr[i,j]
    return sum / arr.size

def numpy_mean(arr):
    return arr.mean()

nd1 = np.random.random((1000, 10000))

res_manual, t_manual = how_long(manual_mean, nd1)
res_numpy, t_numpy = how_long(numpy_mean, nd1)
speedup = t_manual / t_numpy

print 'Numpy is ', speedup, ' times faster than manual'

Numpy is  363.98703982  times faster than manual


In [36]:
# Accessing array elements

a = np.random.rand(5,4)

# Access element at postion (3, 2) # remember 0 indexing!
print a[3,2]

# Elements in defined range
print a[0, 1:3]
print a[0:2, 0:2]
print a[:, 0:3:2] # 0 to 3 is steps of size 2
print a[:, [0,2]] # same thing with a list

# assign values to specific elements

a[0,0] = 1 # replace one element

a[0, :] = 2 # replace the entire first row

a[:, 3] = [1, 2, 3, 4, 5]

print a

0.828503186179
[ 0.94173764  0.6644467 ]
[[ 0.05444369  0.94173764]
 [ 0.47564131  0.82204461]]
[[ 0.05444369  0.6644467 ]
 [ 0.47564131  0.92555613]
 [ 0.43719997  0.69572763]
 [ 0.38208187  0.82850319]
 [ 0.79180428  0.56134387]]
[[ 0.05444369  0.6644467 ]
 [ 0.47564131  0.92555613]
 [ 0.43719997  0.69572763]
 [ 0.38208187  0.82850319]
 [ 0.79180428  0.56134387]]
[[ 2.          2.          2.          1.        ]
 [ 0.47564131  0.82204461  0.92555613  2.        ]
 [ 0.43719997  0.81071398  0.69572763  3.        ]
 [ 0.38208187  0.10815384  0.82850319  4.        ]
 [ 0.79180428  0.34305145  0.56134387  5.        ]]


In [38]:
# Indexing an array with another array

a = np.random.rand(5)
print a

indices = np.array([1,1,2,3])

print a[indices]

[ 0.60263762  0.65670023  0.8299399   0.51057838  0.31339286]
[ 0.65670023  0.65670023  0.8299399   0.51057838]


In [44]:
# Boolean or Mask index arrays

a = np.array([(20, 25, 10, 23, 26, 32, 10, 5, 0),
              (0, 2, 50, 20, 0, 1, 28, 5, 0)])
print a

mean = a.mean()
print mean

# Masking
print a[a < mean]

# Replacement

a[a < mean] = mean
print a

[[20 25 10 23 26 32 10  5  0]
 [ 0  2 50 20  0  1 28  5  0]]
14.2777777778
[10 10  5  0  0  2  0  1  5  0]
[[20 25 14 23 26 32 14 14 14]
 [14 14 50 20 14 14 28 14 14]]


In [4]:
# Arithmetic operations (1.3.21)

# always applied element-wise

a = np.array([(1, 2, 3, 4, 5),
              (10, 20, 30, 40, 50)])

print a

print 2 * a

b = np.array([(100, 200, 300, 400, 500),
              (1, 2, 3, 4, 5)])

print a + b

print a * b # element-wise multiplication

[[ 1  2  3  4  5]
 [10 20 30 40 50]]
[[  2   4   6   8  10]
 [ 20  40  60  80 100]]
[[101 202 303 404 505]
 [ 11  22  33  44  55]]
[[ 100  400  900 1600 2500]
 [  10   40   90  160  250]]
