In [None]:
# A very convenient way to store large Frames is via ParquetFrameFields. Such fields hide the (complex) read/write operations into Parquet byte streams.
# Note that all column names have to be strings and also note that this doesn't work with Pandas 1.0.4 as there has been a bug introduced in that particular version.

In [20]:
# import the ParquetFrameField
from antarctic.PandasFields import ParquetFrameField

In [21]:
# Connect to a MongoDB database
from mongoengine import *
client = connect(db="test", host="mongomock://localhost")

In [22]:
# Use the ORM mapper of MongoEngine to define a Document
class Portfolio(Document):
    # you can define engine and compression as defined for the pandas.to_parquet function
    prices = ParquetFrameField(engine="pyarrow", compression=None)
    

In [23]:
# Define a portfolio object
p = Portfolio()

In [24]:
%%timeit
# construct a huge DataFrame
import pandas as pd
import numpy as np
import string

def name():
    return "".join(np.random.choice(list(string.ascii_lowercase), size=10))

prices = pd.DataFrame(data=1 + 0.01*np.random.randn(20000, 500), columns=[name() for i in range(0, 500)]).cumprod()

518 ms ± 6.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
prices

Unnamed: 0,olixmnrvxr,trbbvgghlc,nexabpvcbg,blggvrhnph,gfilzqsziv,glkhqqfwxj,cdbsrffnqt,esatqbcqew,mbutadxgnl,xszhoaefna,...,jnikgqtfgi,hyjvhecrsx,gvxfhycggh,atobpxpwhn,bqfhlexnov,dvotctdxjx,jfcuehlmvi,pitxglhzfk,oaloumelch,itgyxxafgp
0,0.997861,0.990291,1.007586,0.996369,1.000486,0.998424,0.995242,1.004060,0.996824,0.998670,...,1.011163,0.991818,1.001102,1.000963,1.016361,1.000743,0.998962,0.990820,1.002864,1.002529
1,1.004266,1.010571,1.013871,0.994398,1.014580,0.987894,0.999717,1.002917,0.995042,1.005253,...,1.028646,0.975233,0.998973,1.013110,1.015061,0.978568,0.994263,1.006790,1.003000,0.979984
2,0.987211,1.001133,0.995392,1.000904,1.004417,0.990163,1.000239,1.012826,1.011821,1.000455,...,1.018743,0.976807,0.991540,1.006323,1.003903,0.983141,0.983267,1.000625,1.019473,0.998555
3,0.995087,1.010517,0.986856,0.992485,0.999201,0.984017,1.008297,1.000237,0.992739,0.991259,...,1.005309,0.972332,0.985056,1.025992,1.012446,0.993658,0.990055,0.994116,1.022371,0.999221
4,0.976546,0.993459,0.991473,0.998603,0.997148,0.984412,1.004245,1.006506,0.999362,0.980264,...,0.997731,0.968920,0.989872,1.022532,1.015661,0.992229,0.989711,1.008478,1.035647,0.994375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,0.182855,0.417875,0.123925,1.979863,0.421102,0.604893,0.333914,0.472060,0.231343,0.050274,...,0.051778,4.109077,0.850670,0.352909,0.093175,0.875495,0.123889,0.361407,0.168860,1.645238
19996,0.181574,0.416786,0.125757,1.976915,0.419207,0.599670,0.339208,0.470045,0.233869,0.050463,...,0.051816,4.044513,0.857199,0.353242,0.093850,0.867361,0.125492,0.356019,0.169309,1.661533
19997,0.183542,0.412593,0.125593,1.964027,0.418713,0.601482,0.344833,0.472784,0.234880,0.049955,...,0.051837,4.119802,0.847821,0.352703,0.093730,0.872055,0.126238,0.354292,0.170214,1.666199
19998,0.182938,0.413578,0.125768,1.997358,0.422034,0.595238,0.345093,0.483224,0.236483,0.049697,...,0.050872,4.190817,0.843213,0.356705,0.096081,0.875561,0.127593,0.349798,0.166350,1.664549


In [26]:
# Measuring time doesn't make a lot of sense here as we are not performing any I/O operations on a disk. 

In [27]:
%%timeit 
p.prices = prices

609 ms ± 77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%%timeit
p.prices

47.3 ms ± 505 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
p.prices

Unnamed: 0,olixmnrvxr,trbbvgghlc,nexabpvcbg,blggvrhnph,gfilzqsziv,glkhqqfwxj,cdbsrffnqt,esatqbcqew,mbutadxgnl,xszhoaefna,...,jnikgqtfgi,hyjvhecrsx,gvxfhycggh,atobpxpwhn,bqfhlexnov,dvotctdxjx,jfcuehlmvi,pitxglhzfk,oaloumelch,itgyxxafgp
0,0.997861,0.990291,1.007586,0.996369,1.000486,0.998424,0.995242,1.004060,0.996824,0.998670,...,1.011163,0.991818,1.001102,1.000963,1.016361,1.000743,0.998962,0.990820,1.002864,1.002529
1,1.004266,1.010571,1.013871,0.994398,1.014580,0.987894,0.999717,1.002917,0.995042,1.005253,...,1.028646,0.975233,0.998973,1.013110,1.015061,0.978568,0.994263,1.006790,1.003000,0.979984
2,0.987211,1.001133,0.995392,1.000904,1.004417,0.990163,1.000239,1.012826,1.011821,1.000455,...,1.018743,0.976807,0.991540,1.006323,1.003903,0.983141,0.983267,1.000625,1.019473,0.998555
3,0.995087,1.010517,0.986856,0.992485,0.999201,0.984017,1.008297,1.000237,0.992739,0.991259,...,1.005309,0.972332,0.985056,1.025992,1.012446,0.993658,0.990055,0.994116,1.022371,0.999221
4,0.976546,0.993459,0.991473,0.998603,0.997148,0.984412,1.004245,1.006506,0.999362,0.980264,...,0.997731,0.968920,0.989872,1.022532,1.015661,0.992229,0.989711,1.008478,1.035647,0.994375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,0.182855,0.417875,0.123925,1.979863,0.421102,0.604893,0.333914,0.472060,0.231343,0.050274,...,0.051778,4.109077,0.850670,0.352909,0.093175,0.875495,0.123889,0.361407,0.168860,1.645238
19996,0.181574,0.416786,0.125757,1.976915,0.419207,0.599670,0.339208,0.470045,0.233869,0.050463,...,0.051816,4.044513,0.857199,0.353242,0.093850,0.867361,0.125492,0.356019,0.169309,1.661533
19997,0.183542,0.412593,0.125593,1.964027,0.418713,0.601482,0.344833,0.472784,0.234880,0.049955,...,0.051837,4.119802,0.847821,0.352703,0.093730,0.872055,0.126238,0.354292,0.170214,1.666199
19998,0.182938,0.413578,0.125768,1.997358,0.422034,0.595238,0.345093,0.483224,0.236483,0.049697,...,0.050872,4.190817,0.843213,0.356705,0.096081,0.875561,0.127593,0.349798,0.166350,1.664549


In [None]:
# Don't try with the standard FrameField based on a conversion to and from json. 
# It's slow and couldn't cope with frames of that size
