In [2]:
import numpy as np

## Dictionaries

A dictionary consists of a collection of key-value pairs. Each key-value pair maps the key to its associated value.
You can define a dictionary by enclosing a comma-separated list of key-value pairs in curly braces ({}). A colon (:) separates each key from its associated value

d = {\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;    key: value,\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;    key2: value2,\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;      .\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;      .\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;      .\
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;    keyN: valueN \
}


In [1]:
car = {
  "brand": "Porsche",
  "model": "Boxter",
  "year": 2015,
  "price_than_and_now": [20000,15000]
}


In [2]:
car

{'brand': 'Porsche',
 'model': 'Boxter',
 'year': 2015,
 'price_than_and_now': [20000, 15000]}

#### Accessing dict values
A value is retrieved from a dictionary by specifying its corresponding key in square brackets

In [4]:
print(car['brand'])
print(car['price_than_and_now'])

Porsche
[20000, 15000]


If you refer to a key that is not in the dictionary, Python raises an exception

In [5]:
car['length']

KeyError: 'length'

#### Adding new keys
Adding an entry to an existing dictionary is simply a matter of assigning a new key and value

In [6]:
car['length'] = 350.5

In [7]:
car['length']

350.5

If you want to update an entry, you can just assign a new value to an existing key

In [8]:
car['year'] = 2018

In [9]:
print(car['year'])

2018


**Note**: Although access to items in a dictionary does not depend on order, Python does guarantee that the order of items in a dictionary is preserved. When displayed, items will appear in the order they were defined, and iteration through the keys will occur in that order as well. Items added to a dictionary are added at the end. If items are deleted, the order of the remaining items is retained.

#### Deleting keys

In [18]:
del car['length']

In [19]:
car['length']

KeyError: 'length'

#### What can be a dict key?

In [11]:
mydict = {42: 'aaa', 2.78: 'bbb', True: 'ccc',(3,4):'ddd'}
print(mydict[42])
print(mydict[2.78])
print(mydict[True])
print(mydict[(3,4)])

aaa
bbb
ccc
ddd


Keys must be **immutable**\
a tuple is immutable, while a list is not

In [13]:
mydict[("Yoni",5)] = 'eee'
print(mydict[("Yoni",5)])

eee


In [14]:
mydict[[1,2]] = 'fff'

TypeError: unhashable type: 'list'

#### Build-in methods and operators

In [16]:
# check if key exists
print('model' in car)
print('year' in car)
print('width' in car)

True
True
False


In [17]:
# len function for checking how many keys are there
print(len(car))

5


In [20]:
# clear() function - deletes everything from the dict
mydict.clear()
mydict

{}

In [21]:
# get() method - getting a value from the dict based on key, WILL NOT RAISE ERROR IF KEY DOES NOT EXIST

In [22]:
print(car.get('model'))
print(car.get('length'))

Boxter
None


In [24]:
# items() method - returns all pairs of key:values
list(car.items())

[('brand', 'Porsche'),
 ('model', 'Boxter'),
 ('year', 2018),
 ('price_than_and_now', [20000, 15000])]

In [26]:
# keys() method - returns all keys
# NOTE - retured object is not a list but a "dict_keys" object, if you want to index it you need to convert to list
kys = car.keys()
print(kys)
print(kys[0])

dict_keys(['brand', 'model', 'year', 'price_than_and_now'])


TypeError: 'dict_keys' object is not subscriptable

In [28]:
kyslist = list(car.keys())
print(kyslist)
print(kyslist[0])

['brand', 'model', 'year', 'price_than_and_now']
brand


In [30]:
# values() method returns all values
# same as keys(), it returns a dict_values object which can't be indexed
vals = car.values()
print(vals)
print(vals[0])


dict_values(['Porsche', 'Boxter', 2018, [20000, 15000]])


TypeError: 'dict_values' object is not subscriptable

In [31]:
valslist = list(car.values())
print(valslist)
print(valslist[0])

['Porsche', 'Boxter', 2018, [20000, 15000]]
Porsche


### Iterating over dictionary

In [33]:
# iterating over keys. 
# if we just use the "for x in mydict" x will be assigned the keys of mydict (or iterate over those keys)

car

{'brand': 'Porsche',
 'model': 'Boxter',
 'year': 2018,
 'price_than_and_now': [20000, 15000]}

In [38]:
for key in car:
    print("Current key is {}".format(key))

Current key is brand
Current key is model
Current key is year
Current key is price_than_and_now


In [37]:
# iterating over values
for value in car.values():
    print("Current value is {}".format(value))

Current value is Porsche
Current value is Boxter
Current value is 2018
Current value is [20000, 15000]


In [39]:
# iterating over both keys and values
# in order to do that we can "unpack" what is returned from the iteration
for key,value in car.items():
    print("Current key is {} and its value is {}".format(key,value))

Current key is brand and its value is Porsche
Current key is model and its value is Boxter
Current key is year and its value is 2018
Current key is price_than_and_now and its value is [20000, 15000]


In [50]:
# let's create a more "data-sciency" dict
iris_dataset = {'sepal length (cm)': [5.1, 4.9, 4.7, 4.6, 5.0, 4.4, 3.9, 5.2, 4.2, 4.2, 4.0, 5.5],
                'sepal width (cm)': [3.5, 3.0, 3.2, 3.1, 3.6, 3.3, 3.4, 2.9, 4.1, 3.9, 3.1, 3.3],
                'petal length (cm)': [1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.8, 2.0, 1.9, 1.1, 1.2, 0.9],
                'species': ['virginica', 'setosa', 'versicolor', 'virginica', 'versicolor', 'setosa', 'virginica', 
                            'setosa', 'versicolor', 'virginica', 'versicolor', 'setosa']
               }
iris_dataset

{'sepal length (cm)': [5.1,
  4.9,
  4.7,
  4.6,
  5.0,
  4.4,
  3.9,
  5.2,
  4.2,
  4.2,
  4.0,
  5.5],
 'sepal width (cm)': [3.5,
  3.0,
  3.2,
  3.1,
  3.6,
  3.3,
  3.4,
  2.9,
  4.1,
  3.9,
  3.1,
  3.3],
 'petal length (cm)': [1.4,
  1.4,
  1.3,
  1.5,
  1.4,
  1.7,
  1.8,
  2.0,
  1.9,
  1.1,
  1.2,
  0.9],
 'species': ['virginica',
  'setosa',
  'versicolor',
  'virginica',
  'versicolor',
  'setosa',
  'virginica',
  'setosa',
  'versicolor',
  'virginica',
  'versicolor',
  'setosa']}

In [None]:
# we want a new dictionary with the normalization of the values ,per feature
# reminder  ---> z = (x - u)/sigma , we end up with a feature with 0 mean and std of 1
# various statistical make use of the standard score (most notebly t-test)
# it is also very usful for many machine learning algorithms as well as artificial neural networks

In [56]:
# we create a new empty dict
z_iris_dataset = {}
# we iterate over key:value pairs
for key,val in iris_dataset.items():
    print("Feature is {}:".format(key))
    # check that we only work with numerical key:value pairs and neglect the species one
    if key=='species':
        # if we're at the species key, we'll only assign it as is to the new dictionary
        z_iris_dataset[key] = val
    else:
        # transform list into a numpy array - it's just easier
        npval = np.array(val)
        # calculate the feature std and mean
        valstd = np.std(npval)
        valmean= np.mean(npval)
        # transform into z-values
        ztrans = (npval - valmean)/valstd
        print("The mean of the feature after z-norm is {}".format(np.round(np.mean(ztrans),1)))
        print("The std of the feature after z-norm is {}".format(np.std(ztrans)))
        print('\n')
        # assign into new dictionary. since it has no keys, 
        # we'll add each iteration the key that is the feature name (speal length(cm) etc.)
        z_iris_dataset[key] = ztrans

Feature is sepal length (cm):
The mean of the feature after z-norm is 0.0
The std of the feature after z-norm is 1.0


Feature is sepal width (cm):
The mean of the feature after z-norm is -0.0
The std of the feature after z-norm is 1.0


Feature is petal length (cm):
The mean of the feature after z-norm is 0.0
The std of the feature after z-norm is 1.0


Feature is species:


In [57]:
z_iris_dataset

{'sepal length (cm)': array([ 0.93733358,  0.52831529,  0.119297  , -0.08521214,  0.73282444,
        -0.49423044, -1.51677616,  1.14184273, -0.90324873, -0.90324873,
        -1.31226702,  1.75537017]),
 'sepal width (cm)': array([ 0.3866946 , -1.06341014, -0.48336824, -0.77338919,  0.67671554,
        -0.1933473 ,  0.09667365, -1.35343108,  2.12682028,  1.54677838,
        -0.77338919, -0.1933473 ]),
 'petal length (cm)': array([-0.21023533, -0.21023533, -0.52558833,  0.10511767, -0.21023533,
         0.73582366,  1.05117666,  1.68188266,  1.36652966, -1.15629433,
        -0.84094133, -1.78700033]),
 'species': ['virginica',
  'setosa',
  'versicolor',
  'virginica',
  'versicolor',
  'setosa',
  'virginica',
  'setosa',
  'versicolor',
  'virginica',
  'versicolor',
  'setosa']}

# Numpy

Perhaps the most important library in python, upon which many other libraries are built (pandas, scikit-learn)
Is faster and more easy to use than lists
Basic "building block" - the numpy array

In [58]:
import numpy as np

In [61]:
myfirstnp = np.array([1,2,3,4])
mysecondnp = np.array((4,5,6,7))
print(myfirstnp)
print(mysecondnp)

[1 2 3 4]
[4 5 6 7]


Unlike lists, only int, float and bool are allowed

In [62]:
a = np.array([4.5,2.7,100.74325])
a

array([  4.5    ,   2.7    , 100.74325])

In [63]:
a = np.array([True,False,True])

In [64]:
a

array([ True, False,  True])

#### Useful basic numpy function to create arrays (very limited)

In [66]:
# arange - just like the range function, but returns a numpy array and not a range object/list
a = np.arange(5)
print(a)
a = np.arange(1,6)
print(a)
a = np.arange(3,9,2)
print(a)

[0 1 2 3 4]
[1 2 3 4 5]
[3 5 7]


In [69]:
# linspace - creates an array with start, finish, and a fixed number of elements,
# so spacing happens "behind the scenes"
a = np.linspace(0,10,20)
print(a)

[ 0.          0.52631579  1.05263158  1.57894737  2.10526316  2.63157895
  3.15789474  3.68421053  4.21052632  4.73684211  5.26315789  5.78947368
  6.31578947  6.84210526  7.36842105  7.89473684  8.42105263  8.94736842
  9.47368421 10.        ]


In [74]:
# zeros - creates an array of zeros with some shape
# ones - creates an array of ones with some shape
### if number of dimensions is larger than 1, the shape arguement is expected to be a tuple
### with (rows, col,channels..) structure
a = np.zeros(5)
print(a)
print('\n')
a = np.ones(5)
print(a)
print('\n')
a = np.zeros((2,2))
print(a)
print('\n')
a = np.ones((2,2,))
print(a)

[0. 0. 0. 0. 0.]


[1. 1. 1. 1. 1.]


[[0. 0.]
 [0. 0.]]


[[1. 1.]
 [1. 1.]]


#### Array attributes

In [75]:
# shape - contains the number of rows, col, chanells, etc, is stored as a tuple
a = np.ones((5,5))
print(a)
print('\n')
shape_of = a.shape
print(shape_of)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


(5, 5)


In [76]:
a = np.ones((5,3,5))
shape_of = a.shape
print(shape_of)

(5, 3, 5)


In [77]:
# ndim - number of dimensions
print(a.ndim)

3


#### Operations and Broadcasting
we can add, substruct, multiply etc element wise in numpy

we can also broadcast - use arrays of different shapes when doing aritmatics between arrays

In [87]:
a = np.array([1,2,3,4,5])
c = np.array([4,5,6,7,8])

In [90]:
b = a+ 10
print(b)
print('\n')
b = a * 2
print(b)
print('\n')
b = a / 2
print(b)
print('\n')
b = a**2
print(b)
print('\n')

[11 12 13 14 15]


[ 2  4  6  8 10]


[0.5 1.  1.5 2.  2.5]


[ 1  4  9 16 25]




In [89]:
b = a+c
print(b)
print('\n')
b = a-c
print(b)
print('\n')
b= a*c
print(b)
print('\n')
b = a**c
print(b)
print('\n')

[ 5  7  9 11 13]


[-3 -3 -3 -3 -3]


[ 4 10 18 28 40]


[     1     32    729  16384 390625]




In [22]:
# works with 2D arrays as well
a = np.ones((5,5))
b = 3* np.ones((5,5))

print("a mat:")
print(a)
print('\n')
print("b mat:")
print(b)
print('\n')

z = a + b
print("z mat:")
print(z)

a mat:
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


b mat:
[[3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]]


z mat:
[[4. 4. 4. 4. 4.]
 [4. 4. 4. 4. 4.]
 [4. 4. 4. 4. 4.]
 [4. 4. 4. 4. 4.]
 [4. 4. 4. 4. 4.]]


In [27]:
# for broadcasting smaller arrays into larger ones
# a remains the same
print("a mat:")
print(a)
print('\n')

b = 4* np.ones((1,5))
print("b vec:")
print(b)
print('\n')

z = a + b
print("z mat:")
print(z)

a mat:
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


b vec:
[[4. 4. 4. 4. 4.]]


z mat:
[[5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]
 [5. 5. 5. 5. 5.]]


#### Array methods

In [93]:
a = np.array([1,2,3,4,5,6,7,8,9])

In [94]:
a.shape

(9,)

In [96]:
# reshape - transform a to be of different shape, must be compatible with the previous shape
a = a.reshape((3,3))
print (a)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [97]:
a = a.reshape((2,4))

ValueError: cannot reshape array of size 9 into shape (2,4)

In [98]:
# flatten - will return a long array with only 1D
a = np.arange(8).reshape(2,4) 
print(a)
print('\n')
b = a.flatten()
print(b)

[[0 1 2 3]
 [4 5 6 7]]


[0 1 2 3 4 5 6 7]


In [100]:
# transpose operation
print(a)
print('\n')
b = a.T
print(b)

[[0 1 2 3]
 [4 5 6 7]]


[[0 4]
 [1 5]
 [2 6]
 [3 7]]


In [3]:
# joining two or more arrays
# concatenate - concatinates two or more arrays, the arrays needs to be put in a tuple, and have ndim = 1
a = np.array([1,2,3,4,5,6,7,8,9])
b = a * -2
c = np.concatenate((a,b))
print(c)

[  1   2   3   4   5   6   7   8   9  -2  -4  -6  -8 -10 -12 -14 -16 -18]


In [4]:
a = np.array([1,2,3,4,5,6,7,8,9])
b = a * -2
c = b -5
d = np.concatenate((a,b,c))
print(d)

[  1   2   3   4   5   6   7   8   9  -2  -4  -6  -8 -10 -12 -14 -16 -18
  -7  -9 -11 -13 -15 -17 -19 -21 -23]


In [10]:
# hstack, vstack - vertically or horizontally stack arrays, arrays need to be packed in a tuple

# vstack
d = np.vstack((a,c))
print(d)
print('\n')
# hstack
d = np.hstack((a,c))
print(d)

[[  1   2   3   4   5   6   7   8   9]
 [ -7  -9 -11 -13 -15 -17 -19 -21 -23]]


[  1   2   3   4   5   6   7   8   9  -7  -9 -11 -13 -15 -17 -19 -21 -23]


In [14]:
a = np.ones((2,5))
print("a matrix:")
print(a)
print('\n')
b = 3* np.ones((2,5))
print("b matrix:")
print(b)
print('\n')
d = np.vstack((a,b))
print("Vertical stack:")
print(d)
print('\n')
# hstack
print("Horizontal stack")
d = np.hstack((a,b))
print(d)

a matrix:
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


b matrix:
[[3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]]


Vertical stack:
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [3. 3. 3. 3. 3.]
 [3. 3. 3. 3. 3.]]


Horizontal stack
[[1. 1. 1. 1. 1. 3. 3. 3. 3. 3.]
 [1. 1. 1. 1. 1. 3. 3. 3. 3. 3.]]


#### Slicing an indexing
Unlike lists we can index and slice across both rows and columns (and more higher dimensions)

In [30]:
a = np.random.randint(0,10,size=(5,5))
a

array([[0, 7, 7, 2, 8],
       [9, 1, 8, 7, 8],
       [1, 9, 1, 6, 2],
       [9, 4, 5, 4, 4],
       [0, 7, 8, 6, 7]])

In [34]:
# Basic slicing is an extension of Python's basic concept of slicing to n dimensions.
#A Python slice object is constructed by giving start, stop, and step parameters to the built-in slice function. 
# formula --> [rows, columns] --> [start:stop:step , start:stop:step], remember the stop is exclusive and the start in inclusive
# [j,:] --> jth row, all columns
# [:,j] --> jth columns, all rows

# get first row
print("First row")
b = a[0]
print(b)
print('\n')
b = a[0,:]
print(b)
print('\n')

# get first columns
b = a[:,0]
print("First col")
print(b)
print('\n')

First row
[0 7 7 2 8]


[0 7 7 2 8]


First col
[0 9 1 9 0]




In [39]:
# let's expand a a bit:
a = np.random.randint(0,10,size=(10,10))
print("a mat:")
print(a)
print('\n')
# get first 3 rows
print("First 3 rows:")
b = a[:3,:]
print(b)
print('\n')
# get first 3 columns
print("First 3 columns")
b = a[:,:3]
print(b)
print('\n')
# get all even rows until the 7th row (index 0, 2, 4 ,6)
print("All even rows until 7:")
b = a[:7:2,:]
print(b)
print('\n')

a mat:
[[2 4 9 8 1 5 8 5 2 3]
 [2 9 8 7 5 1 8 1 5 6]
 [0 5 1 7 9 1 5 8 4 0]
 [6 9 1 0 7 0 1 4 0 3]
 [6 9 6 2 1 4 7 4 9 5]
 [8 3 9 1 6 6 6 0 0 5]
 [2 1 3 1 3 8 6 5 3 2]
 [5 3 4 5 3 1 3 5 9 4]
 [7 2 2 2 4 9 0 5 0 0]
 [0 5 5 5 1 2 1 1 0 2]]


First 3 rows:
[[2 4 9 8 1 5 8 5 2 3]
 [2 9 8 7 5 1 8 1 5 6]
 [0 5 1 7 9 1 5 8 4 0]]


First 3 columns
[[2 4 9]
 [2 9 8]
 [0 5 1]
 [6 9 1]
 [6 9 6]
 [8 3 9]
 [2 1 3]
 [5 3 4]
 [7 2 2]
 [0 5 5]]


All even rows until 7:
[[2 4 9 8 1 5 8 5 2 3]
 [0 5 1 7 9 1 5 8 4 0]
 [6 9 6 2 1 4 7 4 9 5]
 [2 1 3 1 3 8 6 5 3 2]]




In [41]:
print(a)
print('\n')

# slicing both rows and columns, 2-4 rows, 4-6 columns
b = a[2:5,4:7]
print(b)
print('\n')

[[2 4 9 8 1 5 8 5 2 3]
 [2 9 8 7 5 1 8 1 5 6]
 [0 5 1 7 9 1 5 8 4 0]
 [6 9 1 0 7 0 1 4 0 3]
 [6 9 6 2 1 4 7 4 9 5]
 [8 3 9 1 6 6 6 0 0 5]
 [2 1 3 1 3 8 6 5 3 2]
 [5 3 4 5 3 1 3 5 9 4]
 [7 2 2 2 4 9 0 5 0 0]
 [0 5 5 5 1 2 1 1 0 2]]


[[9 1 5]
 [7 0 1]
 [1 4 7]]




In [45]:
# last, we can use masking to slice only values we prefer
threshold = 5
b = a[a > threshold]
print(b)

# we can get their index a bit differently
b = np.argwhere(a > threshold)
print(b)

[9 8 8 9 8 7 8 6 7 9 8 6 9 7 6 9 6 7 9 8 9 6 6 6 8 6 9 7 9]
[[0 2]
 [0 3]
 [0 6]
 [1 1]
 [1 2]
 [1 3]
 [1 6]
 [1 9]
 [2 3]
 [2 4]
 [2 7]
 [3 0]
 [3 1]
 [3 4]
 [4 0]
 [4 1]
 [4 2]
 [4 6]
 [4 8]
 [5 0]
 [5 2]
 [5 4]
 [5 5]
 [5 6]
 [6 5]
 [6 6]
 [7 8]
 [8 0]
 [8 5]]


#### a few more helpful functions

In [54]:
# argmax, argmin --> returns the index of the maximum value in an array, if a few elements fit, then the first returns
a = np.array([1,5,10,-5,2,5,3,8,9])
ind = np.argmax(a)
print("max num is {}, and its index is {}".format(a[ind],ind))

max num is 10, and its index is 2


In [64]:
# random:
# randn --> returns number from a standart normal dist with mean 0 and std 1
# randint --> returns number between low and high in uniform dist
# normal --> returns a number from a normal dist with mean mu and std sig
print(np.random.randn(5))
print('\n')
print(np.random.randn(3,3))
print('\n')
print('\n')
print(np.random.randint(1,5,(2,2)))
print('\n')
a = np.random.normal(5,3,(5,5))
print(a)
print("Mean of matrix: {} , std: {}".format(a.mean(),a.std()))

[0.36566407 0.59506159 0.02786914 0.638916   1.2986707 ]


[[ 0.38970056 -0.26559659  0.45096654]
 [ 0.79472875  1.46479721  0.33845621]
 [-0.9285985  -0.60275463  1.69269851]]




[[4 4]
 [2 1]]


[[ 7.12649051  6.8725381   2.61264774 -2.45630311  4.84418785]
 [ 3.41737562  6.91881043  5.95050592  4.13830292  3.70179336]
 [ 6.1006345   5.44572822  7.31827298  9.26736336  3.69707419]
 [ 3.94222041  2.82717827  8.95682957  2.91441223  8.09479603]
 [ 8.20582927  5.7222493  -0.8098864  -1.48735796  0.25064568]]
Mean of matrix: 4.542893559449478 , std: 3.1302085956742833
