# MongoDB Connection #

In [None]:
import os
from dotenv import load_dotenv
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
import pandas as pd
load_dotenv()

In [None]:
mongodb_user = os.getenv("MONGODB_USER")
mongodb_password = os.getenv("MONGODB_PASSOWORD")
mongodb_cluster = os.getenv("MONGODB_CLUSTER")

uri = f"mongodb+srv://{mongodb_user}:{mongodb_password}@{mongodb_cluster}.g7v04mw.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))

try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

# Reading data #

In [None]:
db = client["db_products"]
collection = db["products"]

In [None]:
for doc in collection.find():
    print(doc)

Changing coordinates name:

In [None]:
collection.update_many({}, {"$rename": {"lat": "Latitude", "lon": "Longitude"}})

In [None]:
collection.find_one()

# Applying transformations: filtering by category (books) #

Returning all categories in the database (unique):

In [None]:
collection.distinct("Categoria do Produto")

Query to get only products which the category is "livros":

In [None]:
query = {"Categoria do Produto": "livros"}
books_list = []

for doc in collection.find(query):
    books_list.append(doc)

# Saving data to a dataframe #

In [None]:
df_books = pd.DataFrame(books_list)
df_books

# Formatting dates #
The dates are formatted as "dd/mm/aaaa" and they must be like "yyyy-mm-dd" in order to be accepted as dates in a MySQL database.

In [None]:
df_books.info()

The "Data da Compra" field is a string, so we must convert it to a datetime type:

In [None]:
df_books["Data da Compra"] = pd.to_datetime(df_books["Data da Compra"], format="%d/%m/%Y")
df_books.info()

In [None]:
df_books["Data da Compra"] = df_books["Data da Compra"].dt.strftime("%Y-%m-%d")
df_books

# Saving the data as a CSV file #

In [None]:
df_books.to_csv("../data/table_books.csv", index=False)

# Applying transformations : filtering for products sold from 2021 #

In [None]:
query = {"Data da Compra": {"$regex": "/202[1-9]"}}

products_list = []
for doc in collection.find(query):
    products_list.append(doc)

In [None]:
df_products = pd.DataFrame(products_list)
df_products.head()

Formatting dates:

In [None]:
df_products["Data da Compra"] = pd.to_datetime(df_products["Data da Compra"], format="%d/%m/%Y")
df_products["Data da Compra"] = df_products["Data da Compra"].dt.strftime("%Y-%m-%d")
df_products

Saving data to a CSV file:

In [12]:
df_products.to_csv("../data/table_products_from_2021.csv", index=False)

# Closing MongoDB connection #

In [13]:
client.close()