In [1]:
import numpy as np
import pandas as pd
import time

In [2]:
df = pd.read_csv('heart_data.csv')

df.head()

Unnamed: 0,index,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0


In [3]:
df['imc'] = np.round(10000*df['weight']/(df['height']*df['height']),2)

In [4]:
df.head()

Unnamed: 0,index,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio,imc
0,0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0,21.97
1,1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1,34.93
2,2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1,23.51
3,3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1,28.71
4,4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0,23.01


# Vetorização Pandas

In [7]:
%%timeit

# Sem numpy
def classify_imc(imc):
    if imc < 18.5:
        return 'baixo_peso'
    elif imc >= 18.5 and imc <25:
        return 'normal'
    elif imc >= 25 and imc <30:
        return 'sobrepeso'
    elif imc >=30:
        return 'obesidade'

df['class_imc'] = df.imc.apply(classify_imc)    

105 ms ± 9.09 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
%%timeit
# Com numpy
condicoes = [
    (df['imc'] < 18.5),
    ((df['imc'] >= 18.5) & (df['imc'] <25)),
    ((df['imc']>= 25) & (df['imc']<30)),
    ((df['imc']>=30))
]

classificacao = ['baixo_peso', 'normal', 'sobrepeso', 'obesidade']

df['class_imc2'] = np.select(condicoes, classificacao, default = np.nan)

59.3 ms ± 2.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# List Comprehension

In [9]:
[ x * 2 for x in [1,2,3] ]

[2, 4, 6]

In [16]:
import time
iterations = 1000000
start = time.time()
mylist = []
for i in range(iterations):
    mylist.append(i+1)
end = time.time()
print(end - start)

0.4758877754211426


In [17]:
start = time.time()
mylist = [i+1 for i in range(iterations)]
end = time.time()
print(end - start)

0.22679686546325684


- List comprehensions are faster than for loops to create lists.

In [18]:
start = time.time()
for i in range(iterations):
    i+1
end = time.time()
print(end - start)

0.23300647735595703


In [19]:
start = time.time()
[i+1 for i in range(iterations)]
end = time.time()
print(end - start)

0.32207226753234863


- For loops are faster than list comprehensions to run functions.

In [20]:
start = time.time()
mylist = list(range(iterations))
end = time.time()
print(end - start)

0.07996010780334473


- Array computations are faster than loops.

Reference: https://towardsdatascience.com/list-comprehensions-vs-for-loops-it-is-not-what-you-think-34071d4d8207