# MongoDB Connection #

In [None]:
import os
from dotenv import load_dotenv
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
load_dotenv()

In [None]:
mongodb_user = os.getenv("MONGODB_USER")
mongodb_password = os.getenv("MONGODB_PASSOWORD")
mongodb_cluster = os.getenv("MONGODB_CLUSTER")

uri = f"mongodb+srv://{mongodb_user}:{mongodb_password}@{mongodb_cluster}.g7v04mw.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri, server_api=ServerApi('1'))

try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

# Reading data #

In [None]:
db = client["db_products"]
collection = db["products"]

In [None]:
for doc in collection.find():
    print(doc)

Changing coordinates name:

In [None]:
collection.update_many({}, {"$rename": {"lat": "Latitude", "lon": "Longitude"}})

In [None]:
collection.find_one()

# Applying transformations: filtering by category (books) #

Returning all categories in the database (unique):

In [None]:
collection.distinct("Categoria do Produto")

Query to get only products which the category is "livros":

In [None]:
query = {"Categoria do Produto": "livros"}
books_list = []

for doc in collection.find(query):
    books_list.append(doc)

# Saving data to a dataframe #

In [None]:
import pandas as pd

In [None]:
df_books = pd.DataFrame(books_list)
df_books

# Formatting dates #
The dates are formatted as "dd/mm/aaaa" and they must be like "yyyy-mm-dd" in order to be accepted as dates in a MySQL database.

In [None]:
df_books.info()

The "Data da Compra" field is a string, so we must convert it to a datetime type:

In [None]:
df_books["Data da Compra"] = pd.to_datetime(df_books["Data da Compra"], format="%d/%m/%Y")
df_books.info()

In [13]:
df_books["Data da Compra"] = df_books["Data da Compra"].dt.strftime("%Y-%m-%d")
df_books

Unnamed: 0,_id,Produto,Categoria do Produto,Preço,Frete,Data da Compra,Vendedor,Local da compra,Avaliação da compra,Tipo de pagamento,Quantidade de parcelas,Latitude,Longitude
0,65cfdf2e0db5537846c1cc94,Modelagem preditiva,livros,92.45,5.609697,2020-01-01,Thiago Silva,BA,1,cartao_credito,3,-13.29,-41.71
1,65cfdf2e0db5537846c1cc95,Iniciando em programação,livros,43.84,0.000000,2020-01-01,Mariana Ferreira,SP,5,cartao_credito,1,-22.19,-48.79
2,65cfdf2e0db5537846c1cca7,Iniciando em programação,livros,63.25,3.894137,2022-01-01,Juliana Costa,RJ,5,cartao_credito,4,-22.25,-42.66
3,65cfdf2e0db5537846c1ccb5,Ciência de dados com python,livros,86.13,5.273176,2021-02-01,Camila Ribeiro,RJ,4,cartao_credito,3,-22.25,-42.66
4,65cfdf2e0db5537846c1ccb7,Ciência de dados com python,livros,72.75,1.458158,2021-02-01,Beatriz Moraes,PR,4,cartao_credito,2,-24.89,-51.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,65cfdf2e0db5537846c1f12f,Dashboards com Power BI,livros,41.27,2.728608,2022-07-31,João Souza,SC,5,cartao_credito,12,-27.45,-50.95
738,65cfdf2e0db5537846c1f133,Ciência de dados com python,livros,81.58,4.632666,2020-08-31,Juliana Costa,SP,5,cartao_credito,2,-22.19,-48.79
739,65cfdf2e0db5537846c1f139,Modelagem preditiva,livros,109.17,6.701389,2021-08-31,Felipe Santos,MG,3,cartao_credito,3,-18.10,-44.38
740,65cfdf2e0db5537846c1f145,Modelagem preditiva,livros,94.89,5.578872,2022-08-31,Pedro Gomes,SP,5,cartao_credito,1,-22.19,-48.79


# Saving the data as a CSV file #

In [14]:
df_books.to_csv("../data/table_books.csv", index=False)