<a href="https://colab.research.google.com/github/ysMarcos/projeto-python/blob/main/projeto_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports das Bibliotecas usadas

In [2]:
#Imports
import requests as req
import hashlib
import pandas as pd
import sqlite3
import json
from google.colab import userdata

# Funções utilizadas

In [3]:
#Funcao responsavel para encryptografar para md5 de acordo com a documentacao da api da Marvel
def hashToMD5(publicKey, privateKey):
  #Valor 1 eh o timestamp de exemplo usado na documentacao
  return hashlib.md5(str.encode(str(1) + str(privateKey) + str(publicKey)))

In [4]:
# Funcao responsavel pelas requisicoes
def get(url: str, endpoint: str, headers: dict, params: dict):
  try:
    response = req.get(
        url = url+endpoint,
        headers = headers,
        params = params
      )
    return response.json()
  except Exception as e:
    print(e)

In [23]:
#Funcao responsavel por mapear os dados de acordo com o input recebido
#Este mapeador eh generico e limitado em relacao a tipagens, para a construcao de um
#banco de dados mais complexo e corretamente tipado, seria bom uma classe para os endpoints
#que eh implementada os metodos de ETL
def mapper(keys, values):
  arr = []
  obj = {}
  for i in range(len(values)):
    for j in range(len(keys)):
      #Converte Listas e Dicionarios em string
      if isinstance(values[i][keys[j]], (list, dict)):
        obj[keys[j]] = json.dumps(values[i][keys[j]])
      else:
        obj[keys[j]] = values[i][keys[j]]
    arr.append(obj)
    obj = {}
  return arr

In [6]:
#Funcoes responsaveis pela transformacao dos dados em um Dataframe e salvar o objeto passado
def saveToCSV(obj, csvPath):
  df = pd.DataFrame(obj)
  df.to_csv(csvPath + '.csv', encoding = 'utf-8', index = False, header = True)

def saveToSqlite(obj, tableName):
  df = pd.DataFrame(obj)
  con = sqlite3.connect('bd.db')
  df.to_sql(tableName, con, if_exists='append', index=False)
  #Remover as duplicatas, pode ser lento em altos volumes de dados
  data = pd.read_sql('Select * from '+ tableName, con)
  checkResults = pd.concat([df, data])
  checkResults.drop_duplicates(ignore_index=True, inplace=True)
  checkResults.to_sql(tableName, con, if_exists='replace', index=False)

# Endpoints

In [21]:
#Variaveis que serao utilizadas em todos os endpoints da marvel
url = 'http://gateway.marvel.com/v1/public/'
publicKey = userdata.get('publicKey')
privateKey = userdata.get('privateKey')
securityHash = hashToMD5(publicKey, privateKey).hexdigest()

headers = {
    'Accept': "*/*"
}

params = {
    "ts": 1,
    "apikey": publicKey,
    "hash": securityHash,
    "limit": 100
}

## Characters

In [12]:
endpoint = 'characters'

result = get(url, endpoint, headers, params)['data']['results']
characters = mapper(['id', 'name'], result)
saveToCSV(csvPath = endpoint, obj = characters)
saveToSqlite(characters, endpoint)

## Comics

In [24]:
endpoint = 'comics'

result = get(url, endpoint, headers, params)['data']['results']
comics = mapper(['id', 'digitalId', 'title', 'issueNumber', 'dates', 'prices', 'resourceURI'], result)
saveToCSV(csvPath = endpoint, obj = comics)
saveToSqlite(comics, endpoint)

[{'id': 82967, 'digitalId': 0, 'title': 'Marvel Previews (2017)', 'issueNumber': 0, 'dates': '[{"type": "onsaleDate", "date": "2099-10-30T00:00:00-0500"}, {"type": "focDate", "date": "2019-10-07T00:00:00-0400"}]', 'prices': '[{"type": "printPrice", "price": 0}]', 'resourceURI': 'http://gateway.marvel.com/v1/public/comics/82967'}, {'id': 82965, 'digitalId': 0, 'title': 'Marvel Previews (2017)', 'issueNumber': 0, 'dates': '[{"type": "onsaleDate", "date": "2099-08-28T00:00:00-0500"}, {"type": "focDate", "date": "2019-08-05T00:00:00-0400"}]', 'prices': '[{"type": "printPrice", "price": 0}]', 'resourceURI': 'http://gateway.marvel.com/v1/public/comics/82965'}, {'id': 82970, 'digitalId': 52952, 'title': 'Marvel Previews (2017)', 'issueNumber': 0, 'dates': '[{"type": "onsaleDate", "date": "2099-01-29T00:00:00-0500"}, {"type": "focDate", "date": "2020-01-06T00:00:00-0500"}, {"type": "unlimitedDate", "date": "2020-01-29T00:00:00-0500"}, {"type": "digitalPurchaseDate", "date": "2020-01-29T00:00:0

## Events

In [25]:
endpoint = 'events'

result = get(url, endpoint, headers, params)['data']['results']
events = mapper(['id', 'title', 'description', 'resourceURI', 'start', 'end'], result)
saveToCSV(csvPath = endpoint, obj = events)
saveToSqlite(events, endpoint)