# Data Structures and Sequences
## Tuple

In [1]:
tup = 4, 5, 6

In [2]:
nested_tup = (4, 5, 6), 7, 8

In [3]:
tuple([4, 0, 2])
tuple("strings")

('s', 't', 'r', 'i', 'n', 'g', 's')

In [4]:
tup = (4, 5, 6)
a, b, c = tup
print("a:{} b:{} c:{}".format(a, b, c))

a:4 b:5 c:6


In [5]:
seq = [(1, 2, 3), (4, 5, 6), (6, 7, 8)]
for a, b, c in seq:
    print('a={}, b={}, c={}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=6, b=7, c=8


In [6]:
values = 1, 2, 3, 4, 5
a, b, *rest = values
print(("rest = {} a = {} b = {}".format(rest, a, b)))

rest = [3, 4, 5] a = 1 b = 2


In [7]:
rest.count(3)

1

## List

In [8]:
a_list = [2, 3, 7, None]
tup = ('foo', 'bar', 'baz')
b_list = list(tup)
b_list

['foo', 'bar', 'baz']

In [9]:
b_list[1] = "peekaboo"

In [10]:
gen = range(10)
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
list1 = list(range(20))
list2 =list(range(50))
%timeit list1.extend(list2)
%timeit list1 + list2

In [None]:
a = [7, 2, 6, 1, 3]
a.sort()
a

In [12]:
b = ['saw', 'small', 'He', 'foxed', 'six']
b.sort(key=len)
b

['He', 'saw', 'six', 'small', 'foxed']

In [17]:
import bisect

In [19]:
c = [1, 2, 2, 2 ,3 ,4 ,7]
bisect.bisect(c, 2)
bisect.insort(c,2)

## Slicing

In [24]:
seq = [7, 2, 3, 5, 6, 0, 1]
seq[1:5] = [26,12,5]

In [36]:
seq = list("Hello!")
seq[-2:]

['o', '!']

## Build-in Sequence Functions
### enumerate

In [53]:
%%prun
i = 0
sum = 0
for value in range(10000000):
    sum = sum + value
    i = i+1

 

In [54]:
%%prun
sum = 0
for index,value in enumerate(range(10000000)):
    sum = sum + value

 

In [57]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i,v in enumerate(some_list):
    mapping[v] = i
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

In [58]:
sorted([7, 1, 2, 6, 0, 3, 2])

[0, 1, 2, 2, 3, 6, 7]

In [63]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [64]:
seq3 = [True, False]
list(zip(seq1,seq2, seq3))

[('foo', 'one', True), ('bar', 'two', False)]

In [66]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{} : {} {}'.format(i, a, b))

0 : foo one
1 : bar two
2 : baz three


In [76]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clements'), ('Schilling', 'Curt')]
first_name, last_name = list(zip(*pitchers))

('Ryan', 'Clements', 'Curt')

## Dict

In [86]:
empty_dict = {}
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
d1['b'] = [1, 2, 3, 4]
d1[7] = 'an integer'
d1[5] = 'some value'
d1['dummy'] = 'another value'

In [89]:
del d1[5]

In [90]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dummy': 'another value'}

In [93]:
list(d1.keys())

d1.update({'b':'foo', 'c':12})

In [94]:
mapping = dict(zip(range(4), reversed(range(46))))

In [95]:
mapping

{0: 45, 1: 44, 2: 43, 3: 42}

### Default values

In [121]:
a = """The first edition of this book was published in 2012, during a time when open source
data analysis libraries for Python (such as pandas) were very new and developing rap‐
idly. In this updated and expanded second edition, I have overhauled the chapters to
account both for incompatible changes and deprecations as well as new features that
have occurred in the last five years. I’ve also added fresh content to introduce tools
that either did not exist in 2012 or had not matured enough to make the first cut.
Finally, I have tried to avoid writing about new or cutting-edge open source projects
that may not have had a chance to mature. I would like readers of this edition to find
that the content is still almost as relevant in 2020 or 2021 as it is in 2017.
The major updates in this second edition include:
• All code, including the Python tutorial, updated for Python 3.6 (the first edition
used Python 2.7)
• Updated Python installation instructions for the Anaconda Python Distribution
and other needed Python packages
• Updates for the latest versions of the pandas library in 2017
• A new chapter on some more advanced pandas tools, and some other usage tips
• A brief introduction to using statsmodels and scikit-learn
I also reorganized a significant portion of the content from the first edition to make
the book more accessible to newcomers.
xiConventions Used in This Book
The following typographical conventions are used in this book:
Italic
Indicates new terms, URLs, email addresses, filenames, and file extensions.
Constant width
Used for program listings, as well as within paragraphs to refer to program ele‐
ments such as variable or function names, databases, data types, environment
variables, statements, and keywords.
Constant width bold
Shows commands or other text that should be typed literally by the user.
Constant width italic
Shows text that should be replaced with user-supplied values or by values deter‐
mined by"""
words = a.replace("\n", " ").split(" ")
words = words + words + words + words + words + words + words + words + words

In [125]:
%%prun
by_letter = {}

for word in words:
    by_letter.setdefault(word[0], []).append(word)

 

In [123]:
from collections import defaultdict

In [126]:
%%prun
by_letter = defaultdict(list)

for word in words:
    by_letter[word[0]].append(word)


 

### Valide dict key types

In [131]:
print(hash("I like you"))
print(hash(('I' ,'like', 'you')))

-3744888371751534129
35212948888493069


## Set

In [132]:
set([2,2,3,3,5,8,4,9])

{2, 3, 4, 5, 8, 9}

In [133]:
{1, 2, 5, 8,9,6,5,2,5}

{1, 2, 5, 6, 8, 9}

In [141]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}
print("a | b:{} a & b :{}".format(a|b, a&b))

a | b:{1, 2, 3, 4, 5, 6, 7} a & b :{3, 4, 5}


In [142]:
c = a.copy()
c.remove(5)
print(a,c)

{1, 2, 3, 4, 5} {1, 2, 3, 4}


## List, Set and Dict Comprehensions

In [143]:
strings = ['a', 'as', 'bat', 'car' ,'dove', 'python']
[x.upper() for x in strings if len(x) >2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [146]:
%timeit unique_length = {len(x) for x in strings}
%timeit set(map(len, strings))

732 ns ± 4.31 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
643 ns ± 6.56 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [148]:
loc_mapping = {val:index for index,val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

In [153]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

['Maria', 'Natalia']

In [155]:
%%prun
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('a') >= 2]
    names_of_interest.extend(enough_es)
names_of_interest

 

In [159]:
result = [name for names in all_data for name in names if name.count('e') >=2]
result

['Steven']

In [163]:
some_tuples = [(1,2,3), (4,5,6), (7,8,9)]
res = [[x for x in tup] for tup in some_tuples]
res

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# Function

In [164]:
def my_function(x, y, z = 1.5):
    if z > 1:
        return z* (x + y)
    else:
        return z / (x+y)

## Namespaces, Scope, and Local Functions