# simple pandas vs pandas: Series

In [1]:
from spandas import Series
import pandas as pd
import time
import numpy as np

## Append

In [2]:
s = pd.Series([])
s_append = pd.Series([1])

In [3]:
start = time.time()
for i in range(10000):
    s = s.append(s_append)
end = time.time()
time_s = end - start

In [4]:
fs = Series([])
fs_append = Series([1])

In [5]:
start = time.time()
fs_append = Series([1])
for i in range(10000):
    fs = fs.append(fs_append)
end = time.time()
time_fs = end - start

In [6]:
assert len(s) == len(fs)
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 1.5359, simple pandas time: 0.16553
boost times: 9.2786


In [7]:
start = time.time()
s = s.append([s_append for _ in range(100000)])
end = time.time()
time_s = end - start

In [8]:
start = time.time()
fs = fs.append([fs_append for _ in range(100000)])
end = time.time()
time_fs = end - start

In [9]:
assert len(s) == len(fs)
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 1.0323, simple pandas time: 0.079785
boost times: 12.938


## Apply

In [10]:
s = pd.Series(range(1000000))

In [11]:
fs = Series(s)

In [12]:
start = time.time()
s = s.apply(lambda x: x+1)
end = time.time()
time_s = end - start

In [13]:
start = time.time()
fs = fs.apply(lambda x: x+1)
end = time.time()
time_fs = end - start

In [14]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.32314, simple pandas time: 0.1845
boost times: 1.7514


## Replace

In [15]:
s = pd.Series(range(10000))

In [16]:
fs = Series(s)

In [17]:
d = {i: 10000-i+1 for i in range(10000)}

In [18]:
start = time.time()
s.replace(d, inplace=True)
end = time.time()
time_s = end - start

In [19]:
start = time.time()
fs = fs.replace(d)
end = time.time()
time_fs = end - start

In [20]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.7121, simple pandas time: 0.0069799
boost times: 102.02


## Iteration

In [21]:
r = np.random.randint(2, size=100000)

In [22]:
s = pd.Series(r)

In [23]:
fs = Series(r)

In [24]:
count = 0
start = time.time()
for i, v in s.items():
    if i % 2 and v % 2:
        count += 1
end = time.time()
time_s = end - start
count

25109

In [25]:
count = 0
start = time.time()
for i, v in fs.items():
    if i % 2 and v % 2:
        count += 1
end = time.time()
time_fs = end - start
count

25109

In [26]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.031945, simple pandas time: 0.041888
boost times: 0.76264


In [27]:
count = 0
start = time.time()
for v in fs:
    pass
end = time.time()
time_s = end - start

In [28]:
start = time.time()
for v in s:
    pass
end = time.time()
time_fs = end - start

In [29]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.0089755, simple pandas time: 0.0099735
boost times: 0.89993
