In [7]:
# A very convenient way to store large Frames is via ParquetFrameFields. Such fields hide the (complex) read/write operations into Parquet byte streams.
# Note that all column names have to be strings and also note that this doesn't work with Pandas 1.0.4 as there has been a bug introduced in that particular version.

In [8]:
# import the ParquetFrameField
from antarctic.pandas_fields import ParquetFrameField

In [9]:
# Connect to a MongoDB database
from mongoengine import *
client = connect(db="test", host="mongomock://localhost")

In [10]:
# Use the ORM mapper of MongoEngine to define a Document
class Portfolio(Document):
    # you can define engine and compression as defined for the pandas.to_parquet function
    prices = ParquetFrameField(engine="pyarrow", compression=None)
    

In [11]:
# Define a portfolio object
p = Portfolio()

In [12]:
# construct a huge DataFrame
import pandas as pd
import numpy as np
import string
from uuid import uuid4

prices = pd.DataFrame(data=1 + 0.01*np.random.randn(20000, 500), columns=[str(uuid4()) for _ in range(0, 500)]).cumprod()

In [13]:
prices

Unnamed: 0,0df9b05f-6b58-4c16-b559-66605c155587,d22eee69-b7ff-4ae9-a872-a9c4a60d19d9,a77fe0f1-c4eb-4a69-b6e6-5360a6531d78,7531316f-d195-4e2c-9f0c-b1864ee03f14,988cc006-e927-4b75-a020-4b0493f213df,2ede1b1e-792c-4f6f-8eac-a042857f90a4,027b780a-ec88-4427-88bd-c250a8ad1042,7edb3846-ee72-4fec-8de9-41d3599be896,d5315633-5638-4f97-870f-04f100f5553b,b3577f18-ebf6-4c00-920c-0335bd541c53,...,d2324a85-e959-4b9f-a6e6-9001e93b8f4f,3c51e718-7ce6-4c56-a6eb-3a45c0569387,3a36bc1a-e8dc-400e-8444-a72bba9756af,e4c8a5e3-969b-47e7-85ba-427e11117cfc,32faa91a-feaf-4b77-a8bd-b8d6aacd0a0d,64e4dd83-39b3-4913-ae66-6dc91446e8d5,775d6dfd-117d-48e3-88ff-fb4b0be9f3ac,f3360bdf-7193-401e-8410-ea5d6efaf262,c2e93ef3-607a-40fa-9dd5-a71ce1b5dc43,c3bd3e0b-02f1-4783-b363-e6e8a8af2c09
0,1.024112,0.988765,0.998293,0.997152,0.990916,0.987708,0.998613,1.006951,1.002899,0.994476,...,1.003774,1.026166,0.990639,1.003692,0.997977,1.006784,0.983280,1.014020,0.994893,0.996046
1,1.020615,1.005472,1.025527,0.980547,0.986973,0.985092,1.005748,1.001297,1.005049,0.999441,...,1.002754,1.025130,0.991709,1.016655,0.993261,0.982968,0.970183,1.013295,0.992614,0.997053
2,1.005635,0.998316,1.026607,0.977995,0.993934,0.964096,1.013226,0.993213,1.002411,1.002423,...,1.037940,1.031959,0.991669,1.021133,0.992900,0.985265,0.979549,1.032172,0.980383,0.979442
3,0.999331,0.985370,1.030305,0.985402,0.993686,0.969390,1.027670,0.999595,1.008768,1.018356,...,1.050102,1.037275,0.997987,0.988478,0.992661,0.984575,0.988407,1.036857,0.976256,0.980476
4,0.988080,0.984829,1.040250,0.981991,0.987016,0.980249,1.020180,1.015928,1.021689,1.021455,...,1.052910,1.060618,0.990329,1.013106,0.988745,1.003594,1.002221,1.038254,0.985227,0.974540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,1.939472,0.046600,0.066507,0.139677,1.018513,3.586102,1.158498,0.274928,0.046641,0.060554,...,0.757241,0.153291,0.185331,0.009031,0.510903,1.984960,0.898885,0.569316,0.297602,0.046747
19996,1.936143,0.046201,0.066235,0.137655,1.018210,3.506929,1.145189,0.273666,0.046667,0.060648,...,0.761817,0.152473,0.188133,0.009234,0.508624,2.010018,0.904766,0.579599,0.296945,0.047083
19997,1.923806,0.045105,0.066930,0.137502,1.021422,3.383941,1.137904,0.273703,0.046424,0.059825,...,0.775922,0.153606,0.187739,0.009284,0.511977,2.042069,0.921609,0.579268,0.298236,0.046401
19998,1.898723,0.044929,0.066527,0.135071,1.029988,3.383597,1.123336,0.268511,0.046909,0.059974,...,0.768554,0.153276,0.187818,0.009340,0.507359,2.042479,0.920542,0.577678,0.296669,0.046099


In [14]:
# Measuring time doesn't make a lot of sense here as we are not performing any I/O operations on a disk. 

In [15]:
%%timeit 
p.prices = prices

607 ms ± 73.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit
p.prices

72.4 ms ± 12.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
p.prices

Unnamed: 0,0df9b05f-6b58-4c16-b559-66605c155587,d22eee69-b7ff-4ae9-a872-a9c4a60d19d9,a77fe0f1-c4eb-4a69-b6e6-5360a6531d78,7531316f-d195-4e2c-9f0c-b1864ee03f14,988cc006-e927-4b75-a020-4b0493f213df,2ede1b1e-792c-4f6f-8eac-a042857f90a4,027b780a-ec88-4427-88bd-c250a8ad1042,7edb3846-ee72-4fec-8de9-41d3599be896,d5315633-5638-4f97-870f-04f100f5553b,b3577f18-ebf6-4c00-920c-0335bd541c53,...,d2324a85-e959-4b9f-a6e6-9001e93b8f4f,3c51e718-7ce6-4c56-a6eb-3a45c0569387,3a36bc1a-e8dc-400e-8444-a72bba9756af,e4c8a5e3-969b-47e7-85ba-427e11117cfc,32faa91a-feaf-4b77-a8bd-b8d6aacd0a0d,64e4dd83-39b3-4913-ae66-6dc91446e8d5,775d6dfd-117d-48e3-88ff-fb4b0be9f3ac,f3360bdf-7193-401e-8410-ea5d6efaf262,c2e93ef3-607a-40fa-9dd5-a71ce1b5dc43,c3bd3e0b-02f1-4783-b363-e6e8a8af2c09
0,1.024112,0.988765,0.998293,0.997152,0.990916,0.987708,0.998613,1.006951,1.002899,0.994476,...,1.003774,1.026166,0.990639,1.003692,0.997977,1.006784,0.983280,1.014020,0.994893,0.996046
1,1.020615,1.005472,1.025527,0.980547,0.986973,0.985092,1.005748,1.001297,1.005049,0.999441,...,1.002754,1.025130,0.991709,1.016655,0.993261,0.982968,0.970183,1.013295,0.992614,0.997053
2,1.005635,0.998316,1.026607,0.977995,0.993934,0.964096,1.013226,0.993213,1.002411,1.002423,...,1.037940,1.031959,0.991669,1.021133,0.992900,0.985265,0.979549,1.032172,0.980383,0.979442
3,0.999331,0.985370,1.030305,0.985402,0.993686,0.969390,1.027670,0.999595,1.008768,1.018356,...,1.050102,1.037275,0.997987,0.988478,0.992661,0.984575,0.988407,1.036857,0.976256,0.980476
4,0.988080,0.984829,1.040250,0.981991,0.987016,0.980249,1.020180,1.015928,1.021689,1.021455,...,1.052910,1.060618,0.990329,1.013106,0.988745,1.003594,1.002221,1.038254,0.985227,0.974540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,1.939472,0.046600,0.066507,0.139677,1.018513,3.586102,1.158498,0.274928,0.046641,0.060554,...,0.757241,0.153291,0.185331,0.009031,0.510903,1.984960,0.898885,0.569316,0.297602,0.046747
19996,1.936143,0.046201,0.066235,0.137655,1.018210,3.506929,1.145189,0.273666,0.046667,0.060648,...,0.761817,0.152473,0.188133,0.009234,0.508624,2.010018,0.904766,0.579599,0.296945,0.047083
19997,1.923806,0.045105,0.066930,0.137502,1.021422,3.383941,1.137904,0.273703,0.046424,0.059825,...,0.775922,0.153606,0.187739,0.009284,0.511977,2.042069,0.921609,0.579268,0.298236,0.046401
19998,1.898723,0.044929,0.066527,0.135071,1.029988,3.383597,1.123336,0.268511,0.046909,0.059974,...,0.768554,0.153276,0.187818,0.009340,0.507359,2.042479,0.920542,0.577678,0.296669,0.046099


In [18]:
# Don't try with the standard FrameField based on a conversion to and from json. 
# It's slow and couldn't cope with frames of that size
