# SD212: Graph mining

# Check your skills in Python & numpy

This notebook presents some basic data structures of Python and some operations on numpy arrays.

Recall that you can use:
* `tab` for completion
* ? for inline help
* ?? for inline code (when available)

In [1]:
# inline help
sorted?

[0;31mSignature:[0m [0msorted[0m[0;34m([0m[0miterable[0m[0;34m,[0m [0;34m/[0m[0;34m,[0m [0;34m*[0m[0;34m,[0m [0mkey[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mreverse[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a new list containing all items from the iterable in ascending order.

A custom key function can be supplied to customize the sort order, and the
reverse flag can be set to request the result in descending order.
[0;31mType:[0m      builtin_function_or_method

In [2]:
# your code
def add(x, y):
    '''Add two numbers.'''
    return x + y

In [3]:
add?

[0;31mSignature:[0m [0madd[0m[0;34m([0m[0mx[0m[0;34m,[0m [0my[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Add two numbers.
[0;31mFile:[0m      /tmp/ipykernel_5959/1506788103.py
[0;31mType:[0m      function

In [4]:
add??

[0;31mSignature:[0m [0madd[0m[0;34m([0m[0mx[0m[0;34m,[0m [0my[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
[0;32mdef[0m [0madd[0m[0;34m([0m[0mx[0m[0;34m,[0m [0my[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0;34m'''Add two numbers.'''[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0mx[0m [0;34m+[0m [0my[0m[0;34m[0m[0;34m[0m[0m
[0;31mFile:[0m      /tmp/ipykernel_5959/1506788103.py
[0;31mType:[0m      function

## Import

In [5]:
import numpy as np

## List

In [6]:
names = ['Alice', 'Bernard', 'Carole', 'David']

In [7]:
names[:2]

['Alice', 'Bernard']

In [8]:
names[-2:]

['Carole', 'David']

In [9]:
names.append('Elodie')

In [10]:
names += ['Ferdinand', 'Gabrielle']

In [11]:
names

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [12]:
names_sublist = [name for name in names if 'i' not in name]

In [13]:
names_sublist

['Bernard', 'Carole']

## Set

In [14]:
set(names)

{'Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [15]:
set(names_sublist) <= set(names)

True

In [16]:
set(names) - set(names_sublist)

{'Alice', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [17]:
names += ['Alice']

In [18]:
names

['Alice',
 'Bernard',
 'Carole',
 'David',
 'Elodie',
 'Ferdinand',
 'Gabrielle',
 'Alice']

In [19]:
len(names)

8

In [20]:
len(set(names))

7

In [21]:
a = set(names)

In [22]:
b = {'Hector', 'Irina'}

In [23]:
a & b

set()

In [24]:
a | b

{'Alice',
 'Bernard',
 'Carole',
 'David',
 'Elodie',
 'Ferdinand',
 'Gabrielle',
 'Hector',
 'Irina'}

In [25]:
b.add('Alice')

In [26]:
a & b

{'Alice'}

## Dictionary

In [27]:
len(names)

8

In [28]:
length = {name: len(name) for name in names}

In [29]:
len(length)

7

In [30]:
list(length.keys())

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [31]:
list(length.values())

[5, 7, 6, 5, 6, 9, 9]

In [32]:
for name in length:
    print(name, length[name])

Alice 5
Bernard 7
Carole 6
David 5
Elodie 6
Ferdinand 9
Gabrielle 9


In [33]:
length_to_add = {name: len(name) for name in b}

In [34]:
length.update(length_to_add)

In [35]:
for name in length:
    print(name, length[name])

Alice 5
Bernard 7
Carole 6
David 5
Elodie 6
Ferdinand 9
Gabrielle 9
Irina 5
Hector 6


## Numpy

In [36]:
x = np.array([5, 1, 2, 4])

In [37]:
y = np.zeros(5)

In [38]:
z = np.zeros_like(x, dtype=int)

In [39]:
x[:2]

array([5, 1])

In [40]:
x[-2:]

array([2, 4])

In [41]:
np.sort(x)

array([1, 2, 4, 5])

In [42]:
np.argsort(x)

array([1, 2, 3, 0])

In [43]:
np.argsort(-x)

array([0, 3, 2, 1])

In [44]:
A = np.arange(12).reshape(3, -1)

In [45]:
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [46]:
A.shape

(3, 4)

In [47]:
A.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [48]:
A.sum(axis = 0)

array([12, 15, 18, 21])

In [49]:
A.sum(axis = 1)

array([ 6, 22, 38])

In [50]:
A.dot(x)

array([ 17,  65, 113])

In [51]:
A[:2]

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [52]:
A[:2,1:]

array([[1, 2, 3],
       [5, 6, 7]])

In [53]:
a = np.arange(4)
b = np.arange(5)
np.outer(a, b)

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [54]:
index = np.array([0, 1, 1, 2, 0])

In [55]:
x[index]

array([5, 1, 1, 2, 5])

In [56]:
np.unique(x[index])

array([1, 2, 5])

In [57]:
np.unique(x[index], return_counts=True)

(array([1, 2, 5]), array([2, 1, 2]))

In [58]:
x = np.arange(-3, 10)

In [59]:
x > 0

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [60]:
np.sum(x > 0)

9

In [61]:
x[x > 0]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [62]:
index = np.where(x > 0)[0]

In [63]:
x[index]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
x.nonzero()[0]

array([ 0,  1,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [65]:
len(x.nonzero()[0])

12

In [66]:
x[x.nonzero()]

array([-3, -2, -1,  1,  2,  3,  4,  5,  6,  7,  8,  9])

In [67]:
A = np.random.randint(3, size=(5, 7))

In [68]:
A > 1

array([[False, False, False, False,  True, False,  True],
       [False, False, False, False,  True, False,  True],
       [ True, False,  True, False, False, False,  True],
       [ True, False,  True, False, False, False,  True],
       [False,  True,  True,  True, False, False, False]])

In [69]:
A[A > 1]

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [70]:
A.nonzero()

(array([0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4,
        4]),
 array([2, 4, 6, 0, 1, 4, 5, 6, 0, 1, 2, 3, 6, 0, 2, 4, 6, 0, 1, 2, 3, 4,
        6]))

In [71]:
A[A.nonzero()]

array([1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1,
       1])

In [72]:
np.repeat(np.arange(5), 4)

array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4])

In [73]:
np.add.reduceat(np.arange(10), [0, 3, 7])

array([ 3, 18, 24])

In [74]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2, 3])

array([[ 3,  5,  7],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [75]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2], axis=1)

array([[ 1,  2],
       [ 7,  5],
       [13,  8],
       [19, 11]])

In [76]:
np.random.choice(6)

2

In [77]:
np.random.choice([1,3,6], size=4)

array([3, 1, 1, 1])

In [78]:
np.random.choice([1,3,6], p=[0.1, 0.8, 0.1], size=4)

array([3, 1, 3, 3])

In [79]:
A = np.random.randint(3, size=(5, 7))

In [80]:
np.save("matrix_data", A)

In [81]:
B = np.load("matrix_data.npy")

In [82]:
np.all(A == B)

True