In [None]:
# tuples, lists, dicts, and sets 
# tuples, lists, dicts are most commonly used sequence type

In [1]:
a = [1,2,3]
type(a)

list

In [5]:
tup = 4,5,6
tup = tuple('string')
tup

('s', 't', 'r', 'i', 'n', 'g')

In [7]:
nested_tup = (4,5,6), (7,8)
nested_tup

((4, 5, 6), (7, 8))

In [10]:
nested_tup[0]

(4, 5, 6)

In [13]:
# if an object inside a tuple is mutable, you can modify it in place 
tup = tuple(['foo', [1,2], True])

In [14]:
tup[1].append(3)

In [15]:
tup

('foo', [1, 2, 3], True)

In [16]:
# you can concatenate tuples using +, to produce longer tuple
(4, None, 'foo') + (6, 0) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [17]:
('foo', 'bar') * 4
# note that the objects themselves are not copied, only the references to them, see below 

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

In [1]:
a = [1,2]
b = 3
c = (a, b) * 4 
c 

([1, 2], 3, [1, 2], 3, [1, 2], 3, [1, 2], 3)

In [2]:
a[1] = 3
c

([1, 3], 3, [1, 3], 3, [1, 3], 3, [1, 3], 3)

In [21]:
a.append(5)
c 

([1, 2, 5], 3, [1, 2, 5], 3, [1, 2, 5], 3, [1, 2, 5], 3)

In [22]:
# Unpacking tuples 
tup = (4,5,6) 
a,b,c = tup
b

5

In [23]:
tup = 4,5, (6,7)
a,b,(c,d) = tup 
c

6

In [None]:
# sway variable name 
tmp = a 
a = b 
b = tmp 


In [27]:
# in python, using tuple, it can be done like that 
a, b = 1,2 
print(a,b)

1 2


In [28]:
b,a = a,b 
print(a,b)

2 1


In [29]:
seq = [(1,2,3), (4,5,6), (7,8,9)]
for a,b,c in seq: 
    print(f'a={a}, b={b}, c={c}')

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [None]:
# another common use is returning multiple values from a function. 

In [33]:
# *rest 
values = 1,2,3,4,5 
a, b, *c = values

In [34]:
# *_, many programmers use _ for unwanted variables 
values = 1,2,3,4,5 
a, b, *_ = values

[3, 4, 5]

In [35]:
# tuple methods 
values.count(2)

1

# List

In [None]:
# Lists and tuples are semantically similar and can be interchanged in many functions 

In [36]:
# list is frequently used in materialize and interator or generator expression

a = range(10) 
list(a)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [46]:
b_list = ['foo', 'peekaboo', 'baz', 'dwarf']
print(f"id:{id(b_list[2])}, var:{b_list[2]}")
print(f"b_list id: {id(b_list)}")
b_list.insert(1, 'hello')
print(f"id:{id(b_list[2])}, var:{b_list[2]}")
b_list
print(f"b_list id: {id(b_list)}")

# insert is inefficient compared to append 

id:1759002339952, var:baz
b_list id: 1759003857920
id:1759003774384, var:peekaboo
b_list id: 1759003857920


In [47]:
"dwarf" in b_list
# check whether a list contains a value is a lot slower than doing so with dictionaries 
# and sets, as Python makes a linear scan across the values of the list, whereas it can 
# check the others (based on hash tables) in constant time 

True

In [51]:
x = [4, None, "foo"]
x.extend([1,2,3])
x

# extend is preferable for the below 
# everything = []
# for chunk in list_of_lists: 
#     everything.extend(chunk)

# everything = []
# for chunk in list_of_lists: 
#     everything = everything + chunk 


[4, None, 'foo', 1, 2, 3]

In [53]:
# Sorting 
a = [7, 2, 5, 1, 3]
a.sort()


[1, 2, 3, 5, 7]

In [66]:
b = ["saw", "small", "He", "foxes", "six"]
b.sort(key=len)
c = b.copy() 
print(c)

['He', 'saw', 'six', 'small', 'foxes']


In [76]:
seq = [7, 2, 3, 6, 3, 6, 0, 1]
seq[-1::-1]

[1, 0, 6, 3, 6, 3, 2, 7]

In [70]:
seq = "Hello!"
seq[-1:]

'!'

# Dictionary 

In [77]:
# most important built-in type 
empty_dict = {} 
d1 = {"a": "some value", "b": [1,2,3,4]}
d1 

{'a': 'some value', 'b': [1, 2, 3, 4]}

In [78]:
d1[7] = "an integer"
d1["b"]

[1, 2, 3, 4]

In [80]:
"b" in d1 # check "b" is a key of dict d1 or not 
7 in d1 

True

In [82]:
d1[5] = "some value"
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer', 5: 'some value'}

In [83]:
d1["dummy"] = "another value"
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 5: 'some value',
 'dummy': 'another value'}

In [84]:
del d1[5]

In [86]:
ret = d1.pop("dummy")

In [89]:
ret
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [91]:
# keys and values method gives you iterators of the dictionary's keys and values 
list(d1.keys())

['a', 'b', 7]

In [92]:
list(d1.items()) # allowing iterating both keys and values via 2-tuples 

[('a', 'some value'), ('b', [1, 2, 3, 4]), (7, 'an integer')]

In [95]:
# merge dictionary via merge method 
d1.update({"b": "foo", "c": 12}) # changes the dict in place 
d1 

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

In [119]:
# create dict from seq 
tuples = zip(range(5), reversed(range(5)))
mapping = dict(tuples)
mapping 

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

In [124]:
tuples = zip(range(5), reversed(range(5)))
list(tuples)
print(list(tuples)) # iterator has been exhausted ... 

<zip at 0x1998929a540>

In [125]:
mapping 

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

In [126]:
# default value, get and pop
words = ["apple", "bat", "bar", "atom", "book"]

by_letter = {}

for word in words: 
    letter = word[0]
    if letter not in by_letter: 
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)



In [127]:
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [131]:
from collections import defaultdict

by_letter = defaultdict(list)

# you pass a type or function for generating the default value for each slot in the dictionary:

for word in words: 
    by_letter[word[0]].append(word)

In [134]:
by_letter

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

In [135]:
# Valid dict key types 
# hashable, they generally need to be immutable objects 

d = {} 
d[tuple([1,2,3])] = 5
d

{(1, 2, 3): 5}

In [137]:
# Set 
set([2,2,2,1,3,3])
{2,2,2,1,2,3}

{1, 2, 3}

In [151]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
a.union(b)

a|b # same as union 
a&b # same as intersection

{3, 4, 5}

In [152]:
a.isdisjoint(b)

False

In [153]:
c = a.copy()
c |= b
c

In [156]:
d = a.copy() 
d &= b 
d

{3, 4, 5}

In [None]:
# Built in sequence function 
# for index, value in enumerate(collection):

In [158]:
# zip
seq1 = ["foo", "bar", "baz"]
seq2 = ["one", "two", "three"]
for index, (a, b) in enumerate(zip(seq1, seq2)):
    print(f"{index}: {a}, {b}")

0: foo, one
1: bar, two
2: baz, three


In [None]:
# dict_comp = {key-expr: value-expr for value in collection
#              if condition}

In [159]:
strings = ["a", "as", "bat", "car", "dove", "python"]
unique_lengths = {len(x) for x in strings}

In [161]:
set(map(len, strings))

{1, 2, 3, 4, 6}

In [163]:
# simple lookup map for their index 
loc_mapping = {value: index for index, value in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

In [168]:
all_data = [["John", "Emily", "Michael", "Mary", "Steven"],
    ["Maria", "Juan", "Javier", "Natalia", "Pilar"]]

names_of_interest = []
for names in all_data:
    enough_as = [name for name in names if name.count('a') >= 2]
    names_of_interest.extend(enough_as)

names_of_interest

['Maria', 'Natalia']

In [170]:
# nested list comprehension 
result = [name for names in all_data for name in names if name.count('a') >= 2]
result

['Maria', 'Natalia']

In [171]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattend = [x for tup in some_tuples for x in tup]
flattend 

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [179]:
def func():
    for i in range(5):
        a.append(i)

In [180]:
a = []
func()
a

[0, 1, 2, 3, 4]

In [190]:
states = ["   Alabama ", "Georgia!", "Georgia", "georgia", "FlOrIda",
           "south   carolina##", "West virginia?"]

In [182]:
import re 

def clean_strings(strings): 
    result = [] 
    for value in strings: 
        value = value.strip()
        value = re.sub("[!#?]", "", value)
        value = value.title() 
        result.append(value) 
    return result  

In [185]:
clean_strings(states)

['[Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [186]:
def remove_puncutation(value):
    return re.sub("[!#?]", "", value)

clean_ops = [str.strip, remove_puncutation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings: 
        for func in ops:
            value = func(value) 
        result.append(value)
    return result    

In [191]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [193]:
for x in map(remove_puncutation, states):
    print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


In [194]:
# Generator 
def squares(n=10):
    print(f"Generating squares from 1 to {n ** 2}")
    for i in range(1, n + 1):
        yield i ** 2

gen = squares(15)

In [195]:
for x in gen: 
    print(x)

Generating squares from 1 to 225
1
4
9
16
25
36
49
64
81
100
121
144
169
196
225


In [199]:
# itertools module 
import itertools 

def first_letter(x): 
    return x[0]

names = ["Alan", "Adam", "Wes", "Will", "Albert", "Steven"]

for letter, names in itertools.groupby(names, first_letter): 
    print(letter, list(names)) # names is a generator 

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


# file operations

In [1]:
path = "examples/segismundo.txt"

In [2]:
f = open(path, encoding="utf-8")

In [4]:
for line in f: 
    print(line)

hi

fdklajg;ofan

dosagjal

fjkdlsjf


In [5]:
lines = [x.rstrip() for x in open(path, encoding="utf-8")]

In [9]:
lines

['hi', 'fdklajg;ofan', 'dosagjal', 'fjkdlsjf']

In [11]:
f.close()

In [12]:
with open(path, encoding="utf-8") as f: 
    lines = [x.rstrip() for x in f]

In [13]:
lines

['hi', 'fdklajg;ofan', 'dosagjal', 'fjkdlsjf']

In [15]:
path

with open("tmp.txt", mode="w") as handle: 
    handle.writelines(x for x in open(path) if len(x) > 1)

with open("tmp.txt", mode="r") as f:
    line = f.readlines()

lines

['hi', 'fdklajg;ofan', 'dosagjal', 'fjkdlsjf']