# Using The Pandas Module With MongoDB

##### Ensure to include Pandas in your mongodb_py Conda environment

```bash 
conda install pandas
```

In [1]:
import pandas as pd
from pymongo import MongoClient 

### Common Pandas Data Types

In [2]:
s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [3]:
s2 = pd.Series([1, "test", 3])
s2

0       1
1    test
2       3
dtype: object

In [4]:
df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7,8,9]])
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


### Indexes

In [5]:
df2 = pd.DataFrame([[98, 84, 96], [75, 89, 92], [86,91,83]], 
                   index=["Tim", "Sally", "Mia"], 
                   columns=["Quiz 1", "Quiz 2", "Exam 1"])
df2.index.name = "Student"
df2

Unnamed: 0_level_0,Quiz 1,Quiz 2,Exam 1
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tim,98,84,96
Sally,75,89,92
Mia,86,91,83


In [6]:
df2.index

Index(['Tim', 'Sally', 'Mia'], dtype='object', name='Student')

In [7]:
df2.columns

Index(['Quiz 1', 'Quiz 2', 'Exam 1'], dtype='object')

###  Accessing data

In [8]:
df2["Quiz 2"]["Tim"]

84

In [9]:
df2.loc["Tim"]["Quiz 2"]

84

In [10]:
df2.iloc[0,1]

84

In [11]:
df2.loc["Tim"]

Quiz 1    98
Quiz 2    84
Exam 1    96
Name: Tim, dtype: int64

In [12]:
df2.loc[:]["Quiz 2"]

Student
Tim      84
Sally    89
Mia      91
Name: Quiz 2, dtype: int64

### Set up MongoDB Client

In [13]:
mongodb_uri = "mongodb://localhost:27017/"
db_name = "performance_db"

In [14]:
client = MongoClient(mongodb_uri)
db = client[db_name]

### Converting Pandas DataFrame to dictionary for inserting

In [15]:
df2.to_dict()

{'Quiz 1': {'Tim': 98, 'Sally': 75, 'Mia': 86},
 'Quiz 2': {'Tim': 84, 'Sally': 89, 'Mia': 91},
 'Exam 1': {'Tim': 96, 'Sally': 92, 'Mia': 83}}

In [16]:
df2.to_dict("list")

{'Quiz 1': [98, 75, 86], 'Quiz 2': [84, 89, 91], 'Exam 1': [96, 92, 83]}

In [17]:
df2.to_dict("records")

[{'Quiz 1': 98, 'Quiz 2': 84, 'Exam 1': 96},
 {'Quiz 1': 75, 'Quiz 2': 89, 'Exam 1': 92},
 {'Quiz 1': 86, 'Quiz 2': 91, 'Exam 1': 83}]

In [18]:
df2.reset_index(level=0)

Unnamed: 0,Student,Quiz 1,Quiz 2,Exam 1
0,Tim,98,84,96
1,Sally,75,89,92
2,Mia,86,91,83


In [19]:
df2.reset_index(level=0).to_dict("records")

[{'Student': 'Tim', 'Quiz 1': 98, 'Quiz 2': 84, 'Exam 1': 96},
 {'Student': 'Sally', 'Quiz 1': 75, 'Quiz 2': 89, 'Exam 1': 92},
 {'Student': 'Mia', 'Quiz 1': 86, 'Quiz 2': 91, 'Exam 1': 83}]

In [20]:
db.school.insert_many(df2.reset_index(level=0).to_dict("records"))

<pymongo.results.InsertManyResult at 0x2825581e300>

### Convert MongoDB Entries to Pandas DataFrame

In [21]:
list(db.zips.find().limit(5))

[{'_id': '01020',
  'city': 'CHICOPEE',
  'loc': [-72.576142, 42.176443],
  'pop': 31495,
  'state': 'MA'},
 {'_id': '01012',
  'city': 'CHESTERFIELD',
  'loc': [-72.833309, 42.38167],
  'pop': 177,
  'state': 'MA'},
 {'_id': '01010',
  'city': 'BRIMFIELD',
  'loc': [-72.188455, 42.116543],
  'pop': 3706,
  'state': 'MA'},
 {'_id': '01034',
  'city': 'TOLLAND',
  'loc': [-72.908793, 42.070234],
  'pop': 1652,
  'state': 'MA'},
 {'_id': '01032',
  'city': 'GOSHEN',
  'loc': [-72.844092, 42.466234],
  'pop': 122,
  'state': 'MA'}]

In [22]:
df3 = pd.DataFrame(list(db.zips.find().limit(5)))
df3

Unnamed: 0,_id,city,loc,pop,state
0,1020,CHICOPEE,"[-72.576142, 42.176443]",31495,MA
1,1012,CHESTERFIELD,"[-72.833309, 42.38167]",177,MA
2,1010,BRIMFIELD,"[-72.188455, 42.116543]",3706,MA
3,1034,TOLLAND,"[-72.908793, 42.070234]",1652,MA
4,1032,GOSHEN,"[-72.844092, 42.466234]",122,MA


In [23]:
df3.set_index("_id")

Unnamed: 0_level_0,city,loc,pop,state
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1020,CHICOPEE,"[-72.576142, 42.176443]",31495,MA
1012,CHESTERFIELD,"[-72.833309, 42.38167]",177,MA
1010,BRIMFIELD,"[-72.188455, 42.116543]",3706,MA
1034,TOLLAND,"[-72.908793, 42.070234]",1652,MA
1032,GOSHEN,"[-72.844092, 42.466234]",122,MA
