# Data Types and Structures

## Tuples

In [2]:
# Tuple
t = (1,2.5, 'data')
type(t)

tuple

In [3]:
t[0]

1

In [4]:
type(t[2])

str

In [5]:
t.count('data')

1

In [6]:
t.index(1)

0

## Lists

In [7]:
l = list(t)
l

[1, 2.5, 'data']

In [8]:
type(l)

list

In [9]:
l.append([4,3]) # appends this list as a new item in the list
l

[1, 2.5, 'data', [4, 3]]

In [10]:
l.extend([1,1.4,3]) # adds elements to the list
l

[1, 2.5, 'data', [4, 3], 1, 1.4, 3]

In [11]:
l.insert(1,'insert') # inserts the string 'insert' before the index position = 1
l

[1, 'insert', 2.5, 'data', [4, 3], 1, 1.4, 3]

In [12]:
l.remove('data')
l

[1, 'insert', 2.5, [4, 3], 1, 1.4, 3]

In [13]:
p = l.pop(3)
print(l,p)

[1, 'insert', 2.5, 1, 1.4, 3] [4, 3]


In [14]:
# Slicing
l[2:5]

[2.5, 1, 1.4]

## Control Structures

### Loops

In [15]:
for element in l[2:5]:
    print(element**2)

6.25
1
1.9599999999999997


In [16]:
l

[1, 'insert', 2.5, 1, 1.4, 3]

### List Comprehensions
Very compact "loop" like functions approaching vectorized calculations

In [17]:
m = [i**2 for i in range(5)]
m

[0, 1, 4, 9, 16]

## Functions

In [18]:
def f(x):
    return x**2

def even(x):
    return x % 2 == 0

print("f(2) = %3.2f \neven(3) = %r" % (f(2), even(3)))

f(2) = 4.00 
even(3) = False


Applying a function over a whole list: use "map" 

In [19]:
x = map(even, range(10))
list(x) #convert the iterable to a list

# OR
[even(val) for val in range(10)]

[True, False, True, False, True, False, True, False, True, False]

In [20]:
x = map(lambda x: x**2, range(10))
list(x)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

### Using functions to filter a list

In [21]:
x = filter(even, range(15))
list(x)

[0, 2, 4, 6, 8, 10, 12, 14]

### Reduce
Applies a function to all elements of a list and "reduces" it down to one value.

This was removed from Python 3.  Use an explicit loop instead or some other good function...

In [22]:
sum(range(10))

45

## Dictionaries
Key-Value stores, unordered and unsortable

In [23]:
d = {
    'Name' : 'Angela Merkel',
    'Country' : 'Germany',
    'Profession' : 'Chancelor',
    'Age' : 60
}

type(d)

dict

In [24]:
print(d['Name'], d['Age'])

Angela Merkel 60


In [25]:
d.keys()

dict_keys(['Age', 'Country', 'Name', 'Profession'])

In [26]:
d.values()

dict_values([60, 'Germany', 'Angela Merkel', 'Chancelor'])

In [27]:
d.items()

dict_items([('Age', 60), ('Country', 'Germany'), ('Name', 'Angela Merkel'), ('Profession', 'Chancelor')])

In [29]:
birthday = True
if birthday:
    d['Age'] += 1

print(d['Age'])

62


In [30]:
for item in d.items():
    print( item)

('Age', 62)
('Country', 'Germany')
('Name', 'Angela Merkel')
('Profession', 'Chancelor')


In [32]:
for value in d:
    print(type(value))

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>


## Sets
The objects are unordered collections of other objects, containing every element only once:


In [33]:
s = set(['u','d','ud','du','d','du'])
s

{'d', 'du', 'u', 'ud'}

In [34]:
t = set(['d','dd','uu','u'])

In [36]:
s.union(t) #all of s and t

{'d', 'dd', 'du', 'u', 'ud', 'uu'}

In [38]:
s.intersection(t) # in both s and t

{'d', 'u'}

In [39]:
s.difference(t) # in s but not t

{'du', 'ud'}

In [40]:
t.difference(s) # in t but not s

{'dd', 'uu'}

In [41]:
s.symmetric_difference(t) # in either one but not both

{'dd', 'du', 'ud', 'uu'}

### Good for getting rid of duplicates in lists!

In [42]:
from random import randint
l = [randint(0,10) for i in range(1000)] #1000 random integers between 0 and 10
len(l)

1000

In [43]:
l[:20]

[4, 9, 0, 1, 5, 10, 7, 4, 5, 5, 10, 9, 9, 5, 7, 2, 10, 6, 8, 6]

In [44]:
s = set(l)
s

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

## NumPY Data Structures

### Arrays with Python Lists

In [1]:
v = [0.5,0.75,1.0,1.5,2.0] # vector of numbers

In [2]:
m = [v, v, v] # matrix of numbers 
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [3]:
m[1]

[0.5, 0.75, 1.0, 1.5, 2.0]

In [4]:
m[1][0]

0.5

In [5]:
v1 = [0.5,1.5]
v2 = [1,2]
m = [v1,v2]
c = [m,m] # cube of numbers
c

[[[0.5, 1.5], [1, 2]], [[0.5, 1.5], [1, 2]]]

In [6]:
c[1][1][0]

1

In [7]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v,v,v]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [8]:
v[0] = 'Python'
m

[['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0]]

In [9]:
from copy import deepcopy
v

In [10]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = 3*[deepcopy(v),]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [11]:
v[0] = 'Python'
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

## Regular NumPy arrays

In [16]:
import numpy as np
a = np.array([0, 0.5, 1.0, 1.5, 2.0])
a[:2]

array([ 0. ,  0.5])

In [17]:
a.sum()

5.0

In [18]:
a.std()

0.70710678118654757

In [19]:
a.cumsum()

array([ 0. ,  0.5,  1.5,  3. ,  5. ])

### Vectorized operations

In [20]:
a*2

array([ 0.,  1.,  2.,  3.,  4.])

In [22]:
np.sqrt(a)

array([ 0.        ,  0.70710678,  1.        ,  1.22474487,  1.41421356])

In [23]:
b = np.array([a,a*2])
b

array([[ 0. ,  0.5,  1. ,  1.5,  2. ],
       [ 0. ,  1. ,  2. ,  3. ,  4. ]])

In [24]:
b[0]

array([ 0. ,  0.5,  1. ,  1.5,  2. ])

In [25]:
b.sum()

15.0

#### Axis calculations
* Axis = 0: Column Wise
* Axis = 1: Row Wise

In [26]:
b.sum(axis=0)

array([ 0. ,  1.5,  3. ,  4.5,  6. ])

### Initiating NP Arrays:
* np.zeros
* np.ones
* np.ones_like
* np.zeros_like

In [32]:
c = np.zeros(( 2, 3, 4), dtype ='i', order ='C') # also: np.ones() 
c

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)

In [40]:
d = np.ones_like(c,dtype=np.dtype(float),order='C')
d

array([[[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]],

       [[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]]])

In [42]:
c = np.zeros((2,3,4),dtype=np.dtype(int),order = 'C')
c

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]])

In [49]:
d = np.ones_like(c,dtype = np.dtype(float), order = 'C')
d

array([[[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]],

       [[ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.],
        [ 1.,  1.,  1.,  1.]]])

In [50]:
import random
I = 5000

In [54]:
%time mat = np.random.standard_normal((I,I))

Wall time: 2.12 s


In [55]:
%time mat.sum()

Wall time: 31 ms


-1162.1907941751549

## Structured Arrays
Structured arrays allow us to have different NumPy data types per column.

In [59]:
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'), ('Height', 'f'), ('Children/ Pets', 'i4', 2)]) 
s = np.array([('Smith', 45, 1.83, (0, 1)), ('Jones', 53, 1.72, (2, 2))], dtype = dt) 
s

array([(b'Smith', 45, 1.8300000429153442, [0, 1]),
       (b'Jones', 53, 1.7200000286102295, [2, 2])], 
      dtype=[('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/ Pets', '<i4', (2,))])

In [60]:
s['Name']

array([b'Smith', b'Jones'], 
      dtype='|S10')

In [61]:
s['Height']

array([ 1.83000004,  1.72000003], dtype=float32)

In [62]:
s['Height'].mean()

1.7750001

## Vectorization of Code
Vectorization of code ais a strategy to get more compact code that is possibly executed faster.

In [63]:
r = np.random.standard_normal(( 4, 3)) 
s = np.random.standard_normal(( 4, 3))
r + s

array([[ 0.8958207 , -0.14011231, -0.5048728 ],
       [ 0.51114013, -0.1385177 ,  1.47693907],
       [-0.62480836,  2.21775504,  1.86759311],
       [-0.017303  ,  0.42358837,  0.82865754]])

In [64]:
2*r+3

array([[ 5.9664759 , -0.3911128 ,  2.25230455],
       [ 3.6773676 ,  0.00908451,  6.93436166],
       [ 2.92931397,  4.87330805,  5.09924182],
       [ 3.7543721 ,  2.00453745,  1.21995403]])

In [65]:
s = np.random.standard_normal(3)

In [66]:
r + s

array([[ 0.78511706, -1.56285353,  0.06231746],
       [-0.35943709, -1.36275487,  2.40334602],
       [-0.7334639 ,  1.0693569 ,  1.4857861 ],
       [-0.32093484, -0.3650284 , -0.4538578 ]])