# Data Structure

## Tuple: a fixed-length immutable data container/collection

### Convert any sequqnce or iterator to a tuple by using *tuple* function.

In [2]:
list_a = [1,2,3]
print(type(list_a))
tuple_a = tuple(list_a)
print(type(tuple_a))

<class 'list'>
<class 'tuple'>


### The *Unpacking* property of tuple

In [19]:
# Unpack in value assignment
a,b = 1,2
print("the value of a is {0}; the value of b is {1}".format(a,b))

# Unpack in swaping values
a, b = b, a 
print("the value of a is {0}; the value of b is {1}".format(a,b))

the value of a is 1; the value of b is 2
the value of a is 2; the value of b is 1


In [21]:
# Unpack in the iterations
seq = [(1,2,3), (4,5,6), (7,8,9)]
for a,b,c in seq:
    print("a = {0}, b = {1}, c = {2}".format(a,b,c))

a = 1, b = 2, c = 3
a = 4, b = 5, c = 6
a = 7, b = 8, c = 9


In [23]:
# Unpack Trick: *rest syntax
values = 1,2,3,4,5
a,b, *rest = values
print("a is {0}, b is {1}, and the rest is {2}".format(a,b,rest))
# underscore "_" also works
a,b, *_ = values
print("a is {0}, b is {1}, and the rest is {2}".format(a,b,_))

a is 1, b is 2, and the rest is [3, 4, 5]
a is 1, b is 2, and the rest is [3, 4, 5]


### Immutability 
The element of the tuple is not mutable, but the element of the element of the tuple is not constrained by the immutability of the tuple, it totally dependent on the property of the corresponding element of the tuple.

In [11]:
tup = ("foo", [1,2], True)
print("The original tuple is ", tup)

The original tuple is  ('foo', [1, 2], True)


In [12]:
print("The element of the tuple is not modifiable:")
tup[1] = [1,2,3]

The element of the tuple is not modifiable:


TypeError: 'tuple' object does not support item assignment

In [13]:
print("The elemenet of the element of the tuple may be mutable:")
tup[1].append(3)
print(tup)
# this is because the element is a list, which is mutable.

The elemenet of the element of the tuple may be mutable:
('foo', [1, 2, 3], True)


In [14]:
print("The element of the element of the tuple may be immutable:")
tup[0][0] = "s"
# this is because the element is a string, which is not mutable.

The element of the element of the tuple may be immutable:


TypeError: 'str' object does not support item assignment

### Math operations "+" and "*" are runing like on a string

In [18]:
print(tup*3) 
print(tup + ("hello", "world"))
print(tup + "hello")

('foo', [1, 2, 3], True, 'foo', [1, 2, 3], True, 'foo', [1, 2, 3], True)
('foo', [1, 2, 3], True, 'hello', 'world')


TypeError: can only concatenate tuple (not "str") to tuple

### Tuple method
Since the size and content of tuple can not be changed, the most common method is the "count", which counts the number of occurences of a value.

In [24]:
a = 1,2,2,2,3,4,2
a.count(2)

4

## List: a variable-length and mutable data collection

### Convert any collection into list by function *list()*

In [27]:
tup = 2,3,4,"foo"
print(type(tup))
list_t = list(tup)
print(type(list_t))

<class 'tuple'>
<class 'list'>


### Mutability

#### Adding or removing elements

In [35]:
tup_a = ["foo", "peekaboo", "baz"]
print(tup_a)
print("\n Append an element:")
tup_a.append("dwarf")
print(tup_a)
print("\n Insert an element:")
tup_a.insert(1, "red")
print(tup_a)
print("\n pop an elemnt:")
print(tup_a.pop(2))
print(tup_a)
print("\n remove an element:")
tup_a.append("foo")
print("Before removing:", tup_a)
tup_a.remove("foo")
print("After removing:", tup_a )

['foo', 'peekaboo', 'baz']

 Append an element:
['foo', 'peekaboo', 'baz', 'dwarf']

 Insert an element:
['foo', 'red', 'peekaboo', 'baz', 'dwarf']

 pop an elemnt:
peekaboo
['foo', 'red', 'baz', 'dwarf']

 remove an element:
Before removing: ['foo', 'red', 'baz', 'dwarf', 'foo']
After removing: ['red', 'baz', 'dwarf', 'foo']


#### Concatenating and combining lists

In [41]:
# "+" simple concatenation, which create a new list object
list_1 = [1,2,3]
list_2 = ["a","b","c"]
list_12 = list_1 + list_2
print(list_12)
print(" id of list_1 is {0} \n id of list_2 is {1} \n id of list_12 is {2}".format(id(list_1), id(list_2), id(list_12)))

[1, 2, 3, 'a', 'b', 'c']
 id of list_1 is 2891844036680 
 id of list_2 is 2891849046792 
 id of list_12 is 2891848362056


In [56]:
# list.extend() method, the same way as "+", but modify the original list in place
list_1.extend(list_2)
print("the new list_1 is {0} \n the id of the new list_1 is {1}".format(list_1, id(list_1)))

None
[1, 2, 3, 'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c']
the new list_1 is [1, 2, 3, 'a', 'b', 'c', 'a', 'b', 'c', 'a', 'b', 'c'] 
 the id of the new list_1 is 2891844036680


### Sorting

#### list.sort(): sort a list in place without *creating a new list object*, it does not return a list object, but just change the original list in place.

In [54]:
a = [7,2,5,6,1]
print(a)
print("id of a", id(a))
print("sorting:")
a.sort()
print(a)
print("id of a", id(a))

[7, 2, 5, 6, 1]
id of a 2891848890120
sorting:
[1, 2, 5, 6, 7]
id of a 2891848890120


pass a secondary *sort* key

In [52]:
b = ["saw", "small", "He", "foxes", "six"]
print(b)
print("sorting based on the length of the element:")
b.sort(key = len)
print(b)

['saw', 'small', 'He', 'foxes', 'six']
sorting based on the length of the element:
['He', 'saw', 'six', 'small', 'foxes']


### Slicing []

In [89]:
# Trick in slicing. the last colon indicates the step
seq = [0,1,2,3,4,5,6]
seq[::-1]


(6, 5, 4, 3, 2, 1, 0)


### Built-in Sequence Functions

#### Enumerate():  return a sequence of (*i, index*) tuples 

In [71]:
some_list = ["foo", "baz", "bar"]
for i, v in enumerate(some_list):    # here enumerate(some_list) return a tuple, then we are using the unpacking property of the tuple
    print("The value at the index {0} of the list is {1}".format(i,v) )

The value at the index 0 of the list is foo
The value at the index 1 of the list is baz
The value at the index 2 of the list is bar


#### Sorted(): return a new sorted list object from the elements of any sequence

In [92]:
a = [2,3,1,5,56,1]
print(sorted(a))
print(" id of a is {0}, \n id of sorted(a) is {1}".format(id(a), id(sorted(a))))

<class 'tuple'>
[1, 1, 2, 3, 5, 56]
 id of a is 2891818589832, 
 id of sorted(a) is 2891847147976


In [67]:
sorted("horse race")   # string a also a kind of sequence

[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']

#### zip(): pair up the elements of a number of list, tuple, or other sequences to create a new list object of tuples

In [70]:
seq1 = ["foo", "bar", "baz"]
seq2 = ["one", "two", "three"]
list_zipped = zip(seq1, seq2)
print(list_zipped)
print(type(list_zipped))
print(list(list_zipped))

<zip object at 0x000002A14F5B3A08>
<class 'zip'>
[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]


In [72]:
for i, (a,b) in enumerate(zip(seq1, seq2)):
    print("the index {0} points to the value {1} and {2}".format(i, a, b))

the index 0 points to the value foo and one
the index 1 points to the value bar and two
the index 2 points to the value baz and three


In [75]:
# let's try more unbalanced sequences:
seq1 = ["foo", "bar", "baz"]
seq2 = ["one", "two", "three", "four", "five", "six"]
list_zipped = zip(seq1, seq2)
print(list_zipped)
print(type(list_zipped))
print(list(list_zipped))

<zip object at 0x000002A14F50CE08>
<class 'zip'>
[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]


Clever way to **unzip** using <u>zip(*)</u>, which returns tuples

In [78]:
pichters = [('Nolan', 'Ryan'), ('Roger','Clemens'), ('Schilling','Curt')]
print(pichters)
first_names, last_names = zip(*pichters)
print(first_names)
print(last_names)

[('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]
('Nolan', 'Roger', 'Schilling')
('Ryan', 'Clemens', 'Curt')


## Dictionary: "hash map" or "associative array", a flexibly sized collection of *key-value* pairs

### Create a dictionary:

In [80]:
di = {"a" : 'some value', 'b' : [1,2,3,5] }
di

{'a': 'some value', 'b': [1, 2, 3, 5]}

In [95]:
di['c'] = "hello world"
di

di['b'] = [11,22,33,44]
di

{'a': 'some value', 'b': [11, 22, 33, 44], 'c': 'hello world'}

Dict is a collection of 2-tuples, thus the function *dict* accepts a list of 2-tuples.

In [102]:
tup2 = (('one',1), ('two',2), ('three', 3))
num_dict = dict(tup2)
num_dict

{'one': 1, 'two': 2, 'three': 3}

In [97]:
mapping = dict(zip(range(5), reversed(range(5))))
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

### Get keys and values
The *keys* and *values* give the iterators of the dict's keys and values, respectively.

In [94]:
print(di.keys())
print(list(di.keys()))
print(list(di.values()))

dict_keys(['a', 'b', 'c'])
['a', 'b', 'c']
['some value', [11, 22, 33, 44], 'hello world']


### Default Value Method: dict.setdefault(). dict.get() and dict.pop() method
* dict.setdefault(key, default): when adding a key-value pair
* dict.get(key, default): when research for a key-value pair
* dict.pop(key, default): when remove a key-value pair

In [105]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
letter_dict = {}

## traditional approach
for i in words:
    letter = i[0]
    if letter not in letter_dict:
        letter_dict[letter] = [i]     # here should be [i] rather than i. [i] turns the element type to list, not str.
    else:
        letter_dict[letter].append(i)
print(letter_dict)

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}


In [110]:
## using dict.get approach
letter_dict = {}
for i in words:
    letter = i[0]
    letter_dict.setdefault(letter, []).append(i)
print(letter_dict)
print(letter_dict.get('c', "no key as c"))

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}
no key as c


## Set: an unordered collection of unique elements
Intuitively, they are like dictionarys, but keys only, no values.

### Two ways to create a set

In [111]:
set([1,2,3,4,1,2,3,4])

{1, 2, 3, 4}

In [114]:
{1,2,3,4,1,2,3,4}

{1, 2, 3, 4}

In [115]:
set(1,2,3,4,1,2,3,4)

TypeError: set expected at most 1 arguments, got 8

### Math operations

In [117]:
a = {1,2,3,4,5}
b = {3,4,5,6,7,8}


In [122]:
c = a.copy()  

In [121]:
c |= b
print("set c:",c)
print("set a:",a)

set c: {1, 2, 3, 4, 5, 6, 7, 8}
set a: {1, 2, 3, 4, 5}


In [130]:
alist = [1,2,3,4,4]
aset = {tuple(alist)}  # list can not be the element of a set, since the list is mutable.
print(aset)

{(1, 2, 3, 4, 4)}


TypeError: unhashable type: 'list'

In [131]:
aset = {alist}

TypeError: unhashable type: 'list'

## Tuple, List, Set, Dict comprehensions

In [132]:
strings = ['a','as', 'bat', 'car', 'dove', 'python']

In [135]:
x.upper() for x in strings if len(x) >= 3

SyntaxError: invalid syntax (<ipython-input-135-223b50636b0d>, line 1)

In [136]:
[x.upper() for x in strings if len(x) >= 3]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [137]:
strings_tup = tuple(['a','as', 'bat', 'car', 'dove', 'python'])
[x.upper() for x in strings_tup if len(x) >= 3]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [139]:
{(i, x) for i, x in enumerate(strings)}

{(0, 'a'), (1, 'as'), (2, 'bat'), (3, 'car'), (4, 'dove'), (5, 'python')}

In [140]:
{i: x for i, x in enumerate(strings)}

{0: 'a', 1: 'as', 2: 'bat', 3: 'car', 4: 'dove', 5: 'python'}

Nested list, tuple

In [None]:
some_tuples = ((1,2,3), (4,5,6), (7,8,9))
(i for subtup in some_tuples for i in)