In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import pymongo
from datetime import datetime

In [2]:
raw = load_iris()

var_names = [val.replace(' ', '_').replace('_(cm)', '') for val in raw.feature_names]

iris = pd.DataFrame(np.append(raw.data, raw.target_names[raw.target][:,np.newaxis], axis=1), columns=np.append(var_names, 'target'))
iris = iris.apply(pd.to_numeric, errors='ignore')
iris['dttm'] = datetime.utcnow()

documents = iris.to_dict('records')

In [3]:
class Iris_to_Mongo(object):
    
    def __init__(self, db, coll):
        self.client = pymongo.MongoClient()
        self.db = self.client[db]
        self.coll = self.db[coll]
        self.names = dict(database = db, collection = coll)
        
    def remove_db(self):
        db_name = self.names['database']
        self.client.drop_database(db_name)
        print('Database \'{0}\' Removed!'.format(db_name))
    
    def close_client(self):
        self.client.close()
        print('Client Closed Successfully!')

In [4]:
op1 = Iris_to_Mongo('Iris_test', 'Iris_col')

In [5]:
op1.coll.insert_many(documents)

<pymongo.results.InsertManyResult at 0x7f3a44418888>

In [6]:
op1.coll.find_one()

{'_id': ObjectId('5cc7fb756d67b8285f150a84'),
 'sepal_length': 5.1,
 'sepal_width': 3.5,
 'petal_length': 1.4,
 'petal_width': 0.2,
 'target': 'setosa',
 'dttm': datetime.datetime(2019, 4, 30, 7, 38, 27, 738000)}

In [7]:
subsample = op1.coll.find({'target': 'setosa'})
#it is not yet executed, perform like an iterator.
#its type is pymongo.cursor.Cursor

output = pd.DataFrame(list(subsample))
print('The iris type(s) in the subsample :', output.target.unique())
output.head()

The iris type(s) in the subsample : ['setosa']


Unnamed: 0,_id,dttm,petal_length,petal_width,sepal_length,sepal_width,target
0,5cc7fb756d67b8285f150a84,2019-04-30 07:38:27.738,1.4,0.2,5.1,3.5,setosa
1,5cc7fb756d67b8285f150a85,2019-04-30 07:38:27.738,1.4,0.2,4.9,3.0,setosa
2,5cc7fb756d67b8285f150a86,2019-04-30 07:38:27.738,1.3,0.2,4.7,3.2,setosa
3,5cc7fb756d67b8285f150a87,2019-04-30 07:38:27.738,1.5,0.2,4.6,3.1,setosa
4,5cc7fb756d67b8285f150a88,2019-04-30 07:38:27.738,1.4,0.2,5.0,3.6,setosa


In [8]:
#try other types of find()

subsample2 = op1.coll.find({'petal_length': {'$gte': 6.6}, 'target': {'$regex': '^v'}})

output2 = pd.DataFrame(list(subsample2))
print('The iris type(s) in the subsample :', output2.target.unique())
output2

The iris type(s) in the subsample : ['virginica']


Unnamed: 0,_id,dttm,petal_length,petal_width,sepal_length,sepal_width,target
0,5cc7fb756d67b8285f150aed,2019-04-30 07:38:27.738,6.6,2.1,7.6,3.0,virginica
1,5cc7fb756d67b8285f150af9,2019-04-30 07:38:27.738,6.7,2.2,7.7,3.8,virginica
2,5cc7fb756d67b8285f150afa,2019-04-30 07:38:27.738,6.9,2.3,7.7,2.6,virginica
3,5cc7fb756d67b8285f150afe,2019-04-30 07:38:27.738,6.7,2.0,7.7,2.8,virginica


In [9]:
#find all samples are compare with original dataframe

sample = op1.coll.find()

outall = pd.DataFrame(list(sample))

variables = outall.columns.values
variables

array(['_id', 'dttm', 'petal_length', 'petal_width', 'sepal_length',
       'sepal_width', 'target'], dtype=object)

In [10]:
outall = outall[variables[[4,5,2,3,6,1]]]
outall.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,dttm
0,5.1,3.5,1.4,0.2,setosa,2019-04-30 07:38:27.738
1,4.9,3.0,1.4,0.2,setosa,2019-04-30 07:38:27.738
2,4.7,3.2,1.3,0.2,setosa,2019-04-30 07:38:27.738
3,4.6,3.1,1.5,0.2,setosa,2019-04-30 07:38:27.738
4,5.0,3.6,1.4,0.2,setosa,2019-04-30 07:38:27.738


In [11]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target,dttm
0,5.1,3.5,1.4,0.2,setosa,2019-04-30 07:38:27.738940
1,4.9,3.0,1.4,0.2,setosa,2019-04-30 07:38:27.738940
2,4.7,3.2,1.3,0.2,setosa,2019-04-30 07:38:27.738940
3,4.6,3.1,1.5,0.2,setosa,2019-04-30 07:38:27.738940
4,5.0,3.6,1.4,0.2,setosa,2019-04-30 07:38:27.738940


In [12]:
assert np.all(outall.iloc[:, 0:5].values == iris.iloc[:, 0:5].values), 'not equal'

In [13]:
op1.remove_db()

Database 'Iris_test' Removed!


In [14]:
op1.close_client()

Client Closed Successfully!
