# Intro to Numpy

## Arrays

In [1]:
from numpy import *

In [6]:
a = array([0,1,2,3])
a
type(a) #type of a
a.dtype #type of elements in array
a.itemsize #Bytes per elements
a.shape #or shape(a), gives dimension of array, (rows,columns)
a.size #or size(a), gives number of elements
a.ndim #number of dimension. same results as len(a.shape)
a, type(a), a.dtype, a.itemsize, a.shape, a.size, a.ndim

(array([0, 1, 2, 3]), numpy.ndarray, dtype('int64'), 8, (4,), 4, 1)

In [8]:
a[0] = 100 #behaviour is like list
# therefore stepping through the array is the same e.g. a[3:10:-1]
a

array([100,   1,   2,   3])

In [9]:
a.fill(200) #fill up the elements in array
a

array([200, 200, 200, 200])

In [12]:
g = zeros(5) #default floating point value
g

array([ 0.,  0.,  0.,  0.,  0.])

In [17]:
g = g.astype('int')
g

array([0, 0, 0, 0, 0])

## Multi-Dimensional Array

In [5]:
arange(5)

array([0, 1, 2, 3, 4])

In [7]:
r = arange(6).reshape(2,3) # reshape returns a new array
r #still has same behaviour as python nested list

array([[0, 1, 2],
       [3, 4, 5]])

In [8]:
r[0,1], r[1,1:] # index produces value, slicing produces another array

(1, array([4, 5]))

In [9]:
r[:,2] = 100 # all rows second column
r

array([[  0,   1, 100],
       [  3,   4, 100]])

In [10]:
arange(24).reshape(3,2,4) # 3 dimensional array 
# the convention for indexs reshape(x,y,z)
# last index (-1), or in this case z, always represents number of columns
# Python is 'row major'. Data in a row are stored side by side in memory.
# processing a row is faster, hence adding data to a row is faster.
# the columns typically represents the time axis

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [11]:
a = arange(2*3*2*4).reshape(2,3,2,4) # 4 dimensional
a[:,0,:,-1] # this slicing creates 2x2 array

array([[ 3,  7],
       [27, 31]])

In [16]:
b = a[:,:,:,::2] #striding/steping through values
c = b.copy()
b.fill(100) # b references to a slice in a
c.fill(200) # c is a distinct copy of values from a
a

array([[[[100,   1, 100,   3],
         [100,   5, 100,   7]],

        [[100,   9, 100,  11],
         [100,  13, 100,  15]],

        [[100,  17, 100,  19],
         [100,  21, 100,  23]]],


       [[[100,  25, 100,  27],
         [100,  29, 100,  31]],

        [[100,  33, 100,  35],
         [100,  37, 100,  39]],

        [[100,  41, 100,  43],
         [100,  45, 100,  47]]]])

In [17]:
#growing arrays are computationally expensive
#it is more efficient to preallocate the space in an array
#and load data into array gradually

## Fancy Indexing

In [18]:
a = arange(30).reshape(5,6)
a

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

In [19]:
mask = (a % 3) == 0
mask

array([[ True, False, False,  True, False, False],
       [ True, False, False,  True, False, False],
       [ True, False, False,  True, False, False],
       [ True, False, False,  True, False, False],
       [ True, False, False,  True, False, False]], dtype=bool)

In [20]:
a[mask]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [21]:
mask2 = where(a % 3 == 0)
# provides xpos and ypos arrays where condition is true
# where(condition, value_if_true, value_if_false)
mask2

(array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 3, 0, 3, 0, 3, 0, 3, 0, 3]))

In [22]:
a[mask2]

array([ 0,  3,  6,  9, 12, 15, 18, 21, 24, 27])

In [29]:
a = arange(6).reshape(2,3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [23]:
a.sum(), a.prod(), a.mean(), a.std(), a.var(), a.min(), a.max()

(15, 0, 2.5, 1.707825127659933, 2.9166666666666665, 0, 5)

In [24]:
a[-1,-1] = -9
a.argmin() #position of min value element. (disregard array dimension)

5

In [26]:
unravel_index(a.argmin(), a.shape)
# converts position to the array coordinates based on shape

(1, 2)

In [30]:
a.sum(axis=0), a.sum(axis=1)
# summing in the row direction (0), i.e. sum each columns
# summing in the column direction (1), i,e sum each row
# axis -1 is the last index, by convention the column

(array([3, 5, 7]), array([ 3, 12]))

In [14]:
a = array([[0,1],[2,3]])
a.flatten() # this is a separate copy of a
a.flat # this is an iterator object. references a
a.ravel() # this is a reference to a
a.flatten(), a.flat, a.flat[:], a.ravel()

(array([0, 1, 2, 3]),
 <numpy.flatiter at 0x324dcd0>,
 array([0, 1, 2, 3]),
 array([0, 1, 2, 3]))

In [17]:
a = arange(6).reshape(2,3)
aa = a.transpose() # this references a. 'a.T' is equivalent to transpose
# data in memory is not changed. Only the strides of the array
aa, a.strides, aa.strides

(array([[0, 3],
        [1, 4],
        [2, 5]]), (24, 8), (8, 24))

In [19]:
a = array([[11,21,31],
           [12,22,32],
           [13,23,33]])
a.diagonal(), a.diagonal(offset=1), a.diagonal(offset=-1)

(array([11, 22, 33]), array([21, 32]), array([12, 23]))

In [20]:
i = [0,1,2]
a[i,i]

array([11, 22, 33])

In [21]:
a[i,i] = 2
i2 = array([0,1])
a[i2,i2+1] = 1
a[i2+1,i2] = -1
a

array([[ 2,  1, 31],
       [-1,  2,  1],
       [13, -1,  2]])

## Complex Numbers

In [22]:
a = array([1+1j, 2, 3, 4])
a, a.dtype

(array([ 1.+1.j,  2.+0.j,  3.+0.j,  4.+0.j]), dtype('complex128'))

In [23]:
a.real, a.imag

(array([ 1.,  2.,  3.,  4.]), array([ 1.,  0.,  0.,  0.]))

In [24]:
a.imag = (1,2,3,4)
a

array([ 1.+1.j,  2.+2.j,  3.+3.j,  4.+4.j])

In [28]:
a.conj()

array([ 1.-1.j,  2.-2.j,  3.-3.j,  4.-4.j])

## Type Casting

In [29]:
a = array([1.5, -3], dtype=float32)
a

array([ 1.5, -3. ], dtype=float32)

In [30]:
asarray(a, dtype=float64), asarray(a, dtype=uint8)
# upcast, downcast, makes a copy, but will return original if no type-change

(array([ 1.5, -3. ]), array([  1, 253], dtype=uint8))

In [31]:
a.astype(float32), a.astype(uint8)
# upcast, downcast, always makes a copy

(array([ 1.5, -3. ], dtype=float32), array([  1, 253], dtype=uint8))

## Calculations and Statistics

In [33]:
a=array([[1,2,3],
        [4,5,6]])
a.sum(axis=0), sum(a, axis=0)

(array([5, 7, 9]), array([5, 7, 9]))

In [38]:
a.min(axis=0), amin(a, axis=0), a.argmin(axis=0) #returns index
# amin is more efficient min function, specially for multi-dim arrays
# vice versa for max

(array([1, 2, 3]), array([1, 2, 3]), array([0, 0, 0]))

In [39]:
a.mean(axis=0), average(a, axis=0)

(array([ 2.5,  3.5,  4.5]), array([ 2.5,  3.5,  4.5]))

In [40]:
average(a, weights=[1,2], axis=0) #weighted average

array([ 3.,  4.,  5.])

In [41]:
a.std(axis=0), a.var(axis=0), a.std(), a.var()

(array([ 1.5,  1.5,  1.5]),
 array([ 2.25,  2.25,  2.25]),
 1.707825127659933,
 2.9166666666666665)

In [42]:
a=array([[1,2,3],
        [4,5,6]])
# limit values to a range
# set values <3 to 3
# set values >5 to 5
a.clip(3,5)

array([[3, 3, 3],
       [4, 5, 5]])

In [44]:
# calculate max - min (a.k.a Peak-to-Peak)
a.ptp(axis=0), a.ptp()

(array([3, 3, 3]), 5)

In [45]:
# rounding values
a = array([1.35, 2.5, 1.5])
a.round(), a.round(decimals=1)

(array([ 1.,  2.,  2.]), array([ 1.4,  2.5,  1.5]))

In [48]:
# Identity Matrix
a = identity(4, dtype=int)
a

array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1]])

In [50]:
# empty array
a = empty(2) # empty(shape, dtype=xx, order='C' or 'F')
#'C' stores multi-dim data in row-major/C-style
#'F' stores multi-dim data in col-major/Fortran-style
a

array([ 0.,  0.])

In [52]:
a = linspace(0,1,5) #linearly spaced array (start, end, #elements)
b = logspace(0,1,5) #evenly spaced on log scale between
# base**start, base**end, default base = 10
a, b

(array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ]),
 array([  1.        ,   1.77827941,   3.16227766,   5.62341325,  10.        ]))

In [53]:
a.shape

(5,)

In [55]:
#increasing dimension using newaxis 
b = a[:,newaxis]
c = a[newaxis,:]
b.shape, c.shape

((5, 1), (1, 5))

In [57]:
# creating mesh grid
x,y = mgrid[0:3,0:3]
x

array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2]])

In [59]:
y

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]])

In [60]:
x + y

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [61]:
# creating open grid
x, y = ogrid[0:3, 0:3]
x

array([[0],
       [1],
       [2]])

In [62]:
y

array([[0, 1, 2]])

In [63]:
x + y # open grid arrays are shaped for math ops to broadcast correctly

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

## Structured Arrays

In [65]:
# arrays can be customised data structures
fmt = dtype([('name','S10'),
             ('age',int),
             ('weight',float)])
# now each element/cell contains 'name', 'age', 'weight' 
a = empty((2,3), dtype=fmt)
a

array([[('(\x0coU\x86\x7f\x00\x00@\x97', -8365436307840695515,  -8.76724156e-193),
        ('\xdd*\x86\x7f\x00\x00p\xe1U\x02',   168288329169633280,   2.46417145e+081),
        ('\x86\x7f\x00\x00pb=V\x86\x7f',  9189084870130794496,   1.94758448e+306)],
       [('',                    0,   0.00000000e+000),
        ('Q\x00\x00\x00\x00\x00\x00\x00x\x0b',  5476377149022033263,   3.81418679e-321),
        ('',                    0,   0.00000000e+000)]], 
      dtype=[('name', 'S10'), ('age', '<i8'), ('weight', '<f8')])

In [68]:
# populating the matrix using data type
a['name'] = [['Brad','Jane','John'], \
              ['Henry','George','Brian']]

a['age'] = [[33,25,47], \
            [29,61,32]]

a['weight'] = [[135,105,225], \
                [154,202,137]]

a

array([[('Brad', 33,  135.), ('Jane', 25,  105.), ('John', 47,  225.)],
       [('Henry', 29,  154.), ('George', 61,  202.), ('Brian', 32,  137.)]], 
      dtype=[('name', 'S10'), ('age', '<i8'), ('weight', '<f8')])