In [1]:
import os
import pandas as pd
import numpy as np
from uuid import uuid4
import pickle

In [2]:
n,m = 1000, 20
frame = pd.DataFrame(data=np.random.randn(n, m), columns=[str(uuid4()) for _ in range(0, m)])

## Attempts without Mongo

In [3]:
%%timeit
frame.to_parquet("test.parquet", engine="pyarrow")
pd.read_parquet("test.parquet", engine="pyarrow")

9.46 ms ± 127 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
%%timeit
frame.to_pickle("test.pickle")
pd.read_pickle("test.pickle")

1.45 ms ± 182 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Connecting to Mongo

In [5]:
MONGODB_SETTINGS = {'db': 'mongo', 'host': 'mongo'}

from mongoengine import *
connect(**MONGODB_SETTINGS)

MongoClient(host=['mongo:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary())

## Using Parquet, Convert frame to parquet and back

In [6]:
from antarctic.PandasFields import ParquetFrameField

class Maffay(Document):
    frame = ParquetFrameField()

In [7]:
%%timeit
m=Maffay(frame=frame).save()
m.frame

9.29 ms ± 513 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Using Pickle, Convert using Pickle and back

In [8]:
from antarctic.PandasFields import PickleFrameField

class Falco(Document):
    frame = PickleFrameField()

In [9]:
%%timeit
f = Falco(frame=frame).save()
f.frame

2.62 ms ± 476 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Using Arctic

In [10]:
from arctic import Arctic 

# Connect to Local MONGODB
store = Arctic("mongo")

# Create the library - defaults to VersionStore
store.initialize_library('test')

# Access the library
library = store['test']

  from pandas.util.testing import assert_frame_equal
  from pandas import DataFrame, Series, Panel
Library created, but couldn't enable sharding: no such command: 'enablesharding'. This is OK if you're not 'admin'


In [11]:
%%timeit

# Store the data in the library
library.write('test', frame)

# Reading the data
item = library.read('test').data

12.6 ms ± 1.68 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
