# simple pandas vs pandas: Series

In [1]:
from spandas import Series
import pandas as pd
import time
import numpy as np

## Append

In [2]:
s = pd.Series([])
s_append = pd.Series([1])

In [3]:
start = time.time()
for i in range(10000):
    s = s.append(s_append)
end = time.time()
time_s = end - start

In [4]:
fs = Series([])
fs_append = Series([1])

In [5]:
start = time.time()
fs_append = Series([1])
for i in range(10000):
    fs = fs.append(fs_append)
end = time.time()
time_fs = end - start

In [6]:
assert len(s) == len(fs)
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 1.6256, simple pandas time: 0.16659
boost times: 9.7585


In [7]:
start = time.time()
s = s.append([s_append for _ in range(100000)])
end = time.time()
time_s = end - start

In [8]:
start = time.time()
fs = fs.append([fs_append for _ in range(100000)])
end = time.time()
time_fs = end - start

In [9]:
assert len(s) == len(fs)
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 1.0432, simple pandas time: 0.082778
boost times: 12.602


## Apply

### element-wise

In [10]:
s = pd.Series(range(1000000))

In [11]:
fs = Series(s)

In [12]:
start = time.time()
s = s.apply(lambda x: x+1)
end = time.time()
time_s = end - start

In [13]:
start = time.time()
fs = fs.apply(lambda x: x+1)
end = time.time()
time_fs = end - start

In [14]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.32014, simple pandas time: 0.1865
boost times: 1.7166


### column-wise

In [15]:
start = time.time()
for i in range(100):
    tmp = s.agg(lambda x: x.mean() + x.std())
end = time.time()
time_s = end - start

In [16]:
start = time.time()
for i in range(100):
    tmp = fs.apply(lambda x: x.mean() + x.std(), type='column')
end = time.time()
time_fs = end - start

In [17]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 5.1273, simple pandas time: 0.80983
boost times: 6.3313


## Map

To make it clear, map in simple pandas only do dictionary map

In [18]:
s = pd.Series(range(100000))

In [19]:
fs = Series(s)

In [20]:
d = {i: 10000-i+1 for i in range(100000)}

In [21]:
start = time.time()
s.map(d)
end = time.time()
time_s = end - start

In [22]:
start = time.time()
fs = fs.map(d)
end = time.time()
time_fs = end - start

In [23]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.10076, simple pandas time: 0.056853
boost times: 1.7724


## Iteration

In [24]:
r = np.random.randint(2, size=100000)

In [25]:
s = pd.Series(r)

In [26]:
fs = Series(r)

In [27]:
count = 0
start = time.time()
for i, v in s.items():
    if i % 2 and v % 2:
        count += 1
end = time.time()
time_s = end - start
count

24938

In [28]:
count = 0
start = time.time()
for i, v in fs.items():
    if i % 2 and v % 2:
        count += 1
end = time.time()
time_fs = end - start
count

24938

In [29]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.033939, simple pandas time: 0.042886
boost times: 0.79138


In [30]:
count = 0
start = time.time()
for v in fs:
    pass
end = time.time()
time_s = end - start

In [31]:
start = time.time()
for v in s:
    pass
end = time.time()
time_fs = end - start

In [32]:
print("pandas time: {:.5}, simple pandas time: {:.5}".format(time_s, time_fs))
print("boost times: {:.5}".format(time_s / time_fs))

pandas time: 0.010006, simple pandas time: 0.0099723
boost times: 1.0034
