In [8]:
numbers = [1,2,3,4,5,6,7,8,9,10]

In [9]:
#Non-Pythonic
doubled_numbers = []

for i in range(len(numbers)):
    doubled_numbers.append(numbers[i]*2)

doubled_numbers

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

In [10]:
# Pythonic
doubled_numbers = [x * 2 for x in numbers]
doubled_numbers

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

In [11]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


Built in is better than reinvent the wheel

In [12]:
#Manual
numbers = [1,2,3,4,5,6,7,8,9,10]
numbers


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [13]:
#Built in
list(range(1,11))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [14]:
#easy index
list(enumerate(['a','b','c']))

[(0, 'a'), (1, 'b'), (2, 'c')]

In [15]:
#map(function,iterable)
nums = [1.5,3.5,3.6,7.5,5.2]
list(map(round,nums))

[2, 4, 4, 8, 5]

In [16]:
#map lambda
nums = [1.5,3.5,3.6,7.5,5.2]
list(map(lambda x: x**2,nums))

[2.25, 12.25, 12.96, 56.25, 27.040000000000003]

Numpy

In [17]:
import numpy as np


In [18]:
numb = np.array(range(5))
numb

array([0, 1, 2, 3, 4])

In [19]:
numb.dtype

dtype('int64')

In [20]:
#broadcasting
numb ** 2

array([ 0,  1,  4,  9, 16])

In [21]:
# not so efficient
[num ** 2 for num in range(5)]

[0, 1, 4, 9, 16]

Matrix numpy vs lists

In [22]:
matrix = [list(range(2)),list(range(3,5))]
matrix

[[0, 1], [3, 4]]

In [23]:
matrix[0][1]

1

In [24]:
# fist column
[row[0] for row in matrix]

[0, 3]

In [25]:
npmatrix = np.array(matrix)

In [26]:
npmatrix[0,1]

1

In [27]:
npmatrix[:,0]

array([0, 3])

Conditional numpy

In [28]:
nums = [-2,-1,0,1,2]
nums_np = np.array(nums)
nums_np[nums_np > 0]

array([1, 2])

In [29]:
nums = [-2,-1,0,1,2]
[num for num in nums if num > 0]

[1, 2]

Measure time

In [30]:
import numpy as np

In [31]:
#%timeit -r number of runs -n number of loops
%timeit -r2 -n10 rands_nums = np.random.rand(1000)

49.3 µs ± 2.83 µs per loop (mean ± std. dev. of 2 runs, 10 loops each)


In [32]:
%%timeit -o 
nums = []
for x in range(10):
    nums.append(x)


359 ns ± 3.28 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


<TimeitResult : 359 ns ± 3.28 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)>

In [33]:
time1 = _

In [34]:
time1

<TimeitResult : 359 ns ± 3.28 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)>

In [20]:
time2 = %timeit -o  nums2 = [x for x in range(10)]

319 ns ± 2.47 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [23]:
diff = (time1.average - time2.average) * (10**9)
if diff > 0:
    print('time2 better than time1 by {}'.format(diff))
else:
    print('time1 better than time2 by {}'.format(diff))

time2 better than time1 by 91.10770785710953


Code profiling

In [24]:
%load_ext line_profiler

In [26]:
def recursive_method(n):
    if n == 1:
        return 1 
    else:
        return n * recursive_method(n-1)

In [28]:
%lprun -f recursive_method recursive_method(100)

Timer unit: 1e-06 s

Total time: 5.8e-05 s
File: /tmp/ipykernel_248/3574158587.py
Function: recursive_method at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def recursive_method(n):
     2       100         18.0      0.2     31.0      if n == 1:
     3         1          0.0      0.0      0.0          return 1 
     4                                               else:
     5        99         40.0      0.4     69.0          return n * recursive_method(n-1)

Combining

In [29]:
names =['Bulbasaur','Chamander','Squirtle']
hps = [45,39,44]

In [32]:
combined =[*zip(names,hps)]
combined

[('Bulbasaur', 45), ('Chamander', 39), ('Squirtle', 44)]

Counter

In [28]:
import random
types = ['Rock','Dragon','Ghost','Ice','Poison','Grass','Flying','Eletric','Fairy','Steel','Psychic','Bug','Dark','Fighting','Ground','Fire','Normal','Water']

In [29]:
types_list = [random.choice(types) for x in range(720)]

In [3]:
from collections import Counter

In [9]:
type_count = Counter(types_list)
type_count

Counter({'Dark': 52,
         'Flying': 35,
         'Dragon': 40,
         'Fire': 35,
         'Ghost': 33,
         'Psychic': 36,
         'Fairy': 46,
         'Rock': 48,
         'Poison': 35,
         'Ground': 42,
         'Grass': 38,
         'Normal': 45,
         'Steel': 36,
         'Ice': 42,
         'Fighting': 39,
         'Eletric': 48,
         'Bug': 30,
         'Water': 40})

Combination generators

In [10]:
from itertools import combinations

In [15]:
types = ['Rock','Dragon','Ghost','Ice']

In [18]:
combos = combinations(types,2)
combos = [*combos]
combos

[('Rock', 'Dragon'),
 ('Rock', 'Ghost'),
 ('Rock', 'Ice'),
 ('Dragon', 'Ghost'),
 ('Dragon', 'Ice'),
 ('Ghost', 'Ice')]

Sets

In [19]:
typeA = ['Rock','Dragon','Ghost','Ice']
typeB = ['Ice','Poison','Grass','Flying']

In [20]:
setA = set(typeA)
setB = set(typeB)

In [21]:
setA.intersection(setB)

{'Ice'}

In [22]:
setA.difference(setB)

{'Dragon', 'Ghost', 'Rock'}

In [23]:
setA.union(setB)

{'Dragon', 'Flying', 'Ghost', 'Grass', 'Ice', 'Poison', 'Rock'}

In [24]:
setA.symmetric_difference(setB)

{'Dragon', 'Flying', 'Ghost', 'Grass', 'Poison', 'Rock'}

In [36]:
types_tuple = tuple(types_list)
types_set = set(types_list)

In [37]:
%timeit 'Grass' in types_list

34.2 ns ± 0.0644 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [38]:
%timeit 'Grass' in types_tuple

33.8 ns ± 0.25 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [39]:
%timeit 'Grass' in types_set

15.2 ns ± 0.18 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)


In [41]:
unique = set(type_count)
unique

{'Bug',
 'Dark',
 'Dragon',
 'Eletric',
 'Fairy',
 'Fighting',
 'Fire',
 'Flying',
 'Ghost',
 'Grass',
 'Ground',
 'Ice',
 'Normal',
 'Poison',
 'Psychic',
 'Rock',
 'Steel',
 'Water'}


Loops

In [44]:
hp = [random.choice(range(10,255)) for x in range(898)]
spd = [random.choice(range(5,200)) for x in range(898)]
dfs = [random.choice(range(5,230)) for x in range(898)]
att = [random.choice(range(5,190)) for x in range(898)]


In [49]:
combined =[*zip(hp,spd,dfs,att)]
pokestatus = [*map(list,combined)]

In [54]:
%timeit total = [sum(row) for row in pokestatus]

45.9 µs ± 207 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [52]:
%timeit total = [*map(sum,pokestatus)]

35 µs ± 205 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [53]:
import numpy as np

np_pokestatus = np.array(pokestatus)

In [62]:
%timeit total = np_pokestatus.sum(axis=1)


13.6 µs ± 136 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


Better for

In [2]:
import numpy as np

names = ['Absol','Aron','Jynx','Natu','Onix']
attacks = np.array([130,70,50,50,45])

In [37]:
#bad approach
for pokemon, attack in zip(names,attacks):
    total_attack_avg = attacks.mean()
    if attack > total_attack_avg:
        print("{}'s attack {} > avarage {}!"
        .format(pokemon,attack,total_attack_avg))

Absol's attack 130 > avarage 69.0!
Aron's attack 70 > avarage 69.0!


In [4]:
#better approach
total_attack_avg = attacks.mean()
for pokemon, attack in zip(names,attacks):
    if attack > total_attack_avg:
        print("{}'s attack {} > avarage {}!"
        .format(pokemon,attack,total_attack_avg))

Absol's attack 130 > avarage 69.0!
Aron's attack 70 > avarage 69.0!


In [5]:
names = ['Absol','Aron','Jynx','Natu','Onix','Pikachu','Squirtle','Articuno']
legend_status = [False,False,False,False,False,False,False,True]
generations = [3,4,1,2,1,1,1,1]

In [38]:
#conversion bad approach
poke_data = []
for poke_tuple in zip(names, legend_status, generations):
    poke_list = list(poke_tuple)
    poke_data.append(poke_list)

print(poke_data)

[['Absol', False, 3], ['Aron', False, 4], ['Jynx', False, 1], ['Natu', False, 2], ['Onix', False, 1], ['Pikachu', False, 1], ['Squirtle', False, 1], ['Articuno', True, 1]]


In [39]:
#conversion better approach
poke_data = []
for poke_tuple in zip(names, legend_status, generations):
    poke_data.append(poke_tuple)
poke_data = [*map(list,poke_data)]
print(poke_data)

[['Absol', False, 3], ['Aron', False, 4], ['Jynx', False, 1], ['Natu', False, 2], ['Onix', False, 1], ['Pikachu', False, 1], ['Squirtle', False, 1], ['Articuno', True, 1]]


Dataframes

In [40]:
import pandas as pd

In [41]:
baseball_df = pd.read_csv('https://assets.datacamp.com/production/repositories/3581/datasets/779033fb8fb5021aee9ff46253980abcbc5851f3/baseball_stats.csv')
baseball_df.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424


In [50]:
baseball_df['win_perc'] = baseball_df.W / baseball_df.G
baseball_df['win_perc'] = baseball_df['win_perc'].round(2)

In [51]:
baseball_df.head()

Unnamed: 0,Team,League,Year,RS,RA,W,OBP,SLG,BA,Playoffs,RankSeason,RankPlayoffs,G,OOBP,OSLG,win_perc
0,ARI,NL,2012,734,688,81,0.328,0.418,0.259,0,,,162,0.317,0.415,0.5
1,ATL,NL,2012,700,600,94,0.32,0.389,0.247,1,4.0,5.0,162,0.306,0.378,0.58
2,BAL,AL,2012,712,705,93,0.311,0.417,0.247,1,5.0,4.0,162,0.315,0.403,0.57
3,BOS,AL,2012,734,806,69,0.315,0.415,0.26,0,,,162,0.331,0.428,0.43
4,CHC,NL,2012,613,759,61,0.302,0.378,0.24,0,,,162,0.335,0.424,0.38


In [49]:
#use iterrows() instead of iloc in for
win_perc_list = []

for i, row in baseball_df.iterrows():
    wins = row['W']
    games_played = row['G']
    win_perc = wins/games_played
    win_perc_list.append(win_perc)
baseball_df['win_perc'] = win_perc_list

In [52]:
teams_wins_df = baseball_df[['Team','Year','W']]
teams_wins_df.head()

Unnamed: 0,Team,Year,W
0,ARI,2012,81
1,ATL,2012,94
2,BAL,2012,93
3,BOS,2012,69
4,CHC,2012,61


In [61]:
%%timeit
for row_tuple in teams_wins_df.iterrows():
    type(row_tuple[1]) #pandas.Series

15.8 ms ± 226 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [87]:
%%timeit
#itertuples is even better than iterrows
for row_tuple in teams_wins_df.itertuples():
    type(row_tuple[1]) #named_tuple Pandas(Index=0, Team='ARI', Year=2012, W=81)


561 µs ± 6.29 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


Just apply it

In [89]:
%%timeit
run_diffs_iterrows = []
for i, row in baseball_df.iterrows():
    run_diff = row['RS'] - row['RA']
    run_diffs_iterrows.append(run_diff)
baseball_df['run_diffs'] = run_diffs_iterrows

24.4 ms ± 567 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [90]:
%%timeit
run_diffs_itertuples= []
for i, row in enumerate(baseball_df.itertuples(),1):
    run_diff = row.RS - row.RA
    run_diffs_itertuples.append(run_diff)
baseball_df['run_diffs'] = run_diffs_itertuples

1.92 ms ± 19 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [91]:
%%timeit
#apply is the same as map, it is a better option then iterrows
# 0 columns, 1 for rows
baseball_df['run_diffs'] =  baseball_df.apply( lambda row: row['RS'] - row['RA'], axis=1)

7.5 ms ± 14.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


Broadcast is the best

In [92]:
%%timeit
baseball_df['run_diffs'] = baseball_df.RS - baseball_df.RA

91.6 µs ± 322 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
