In [1]:
import pandas as pd
import datetime as dt

In [2]:
loc_history = pd.read_json("data/Histórico de localização.json")

In [3]:
loc_history.describe

<bound method NDFrame.describe of                                                  locations
0        {'timestampMs': '1378508750990', 'latitudeE7':...
1        {'timestampMs': '1378508766022', 'latitudeE7':...
2        {'timestampMs': '1378509026816', 'latitudeE7':...
3        {'timestampMs': '1378509072785', 'latitudeE7':...
4        {'timestampMs': '1378509118790', 'latitudeE7':...
...                                                    ...
1329815  {'timestampMs': '1635960303224', 'latitudeE7':...
1329816  {'timestampMs': '1635960483754', 'latitudeE7':...
1329817  {'timestampMs': '1635960664538', 'latitudeE7':...
1329818  {'timestampMs': '1635960844743', 'latitudeE7':...
1329819  {'timestampMs': '1635960914443', 'latitudeE7':...

[1329820 rows x 1 columns]>

In [4]:
locations = pd.json_normalize(loc_history['locations'])

In [5]:
locations['timestampMs'] = pd.to_numeric(locations['timestampMs'])
locations['latitudeE7'] = pd.to_numeric(locations['latitudeE7'])
locations['longitudeE7'] = pd.to_numeric(locations['longitudeE7'])

In [6]:
locations = locations[
    locations['timestampMs'] > dt.datetime.timestamp(dt.datetime.now() - dt.timedelta(days=15)) * 1000]

In [7]:
import requests
import time

In [8]:
####
# URL de conexão com a API CEP Aberto
####
with open('token.txt') as file:
    token = file.readline()

url = "https://www.cepaberto.com/api/v3/nearest"
headers = {'Authorization': 'Token token=' + token}

In [9]:
entries = pd.DataFrame()
initTimestamp = 0
initLatitude = 0
initLongitude = 0
for index, location in locations.iterrows():
    if initTimestamp == 0:
        initTimestamp = location['timestampMs']
        initLatitude = location['latitudeE7']
        initLongitude = location['longitudeE7']
    else:
        ####
        # Verifica se houve alguma mudança significativa na localização baseada nos dados de lat e lng fornecidos pelo GPS
        ####
        if abs(location['latitudeE7'] - initLatitude) > 50000 or abs(location['longitudeE7'] - initLongitude) > 50000:
            duration = (dt.datetime.fromtimestamp(location['timestampMs'] / 1000) - dt.datetime.fromtimestamp(
                initTimestamp / 1000)).total_seconds()
            ####
            # O usuário deve ter permanecido por pelo menos 30min no local para ser considerado
            ####
            if duration > 1800:
                ####
                # Formatando os dados de localização para se comunicar com a API do CEP Aberto
                ####
                latitude = location['latitudeE7'] / 10000000
                longitude = location['longitudeE7'] / 10000000
                response = requests.get(url, headers=headers, params={'lat': latitude, 'lng': longitude})
                cep = response.json()['cep']

                ####
                # Adicionando entrada à lista
                ####
                tmp = {
                    'data' : dt.datetime.fromtimestamp(location['timestampMs'] / 1000).strftime("%Y/%m/%d, %H:%M:%S"),
                    'duration' : duration,
                    'cep' : cep
                }
                entries = entries.append(tmp, ignore_index=True, sort=False)

                ####
                # A API é free mas limitada à uma consulta por segundo
                ####
                time.sleep(1)

            initTimestamp = location['timestampMs']
            initLatitude = location['latitudeE7']
            initLongitude = location['longitudeE7']

In [10]:
###
# Carregamento dos dados da prefeitura do Rio de Janeiro
###
covid_cases = pd.read_csv('data/Dados_CEP_MRJ_covid_19.csv')


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [11]:
###
# Convertendo os dados de data para objeto datetime e selecionando apenas os últimos 15 dias
###
covid_cases['dt_inicio_sintomas'] = pd.to_datetime(covid_cases['dt_inicio_sintomas'])
covid_cases[covid_cases['dt_inicio_sintomas'] > dt.datetime.now() - dt.timedelta(days=15)]

Unnamed: 0,dt_notific,dt_inicio_sintomas,bairro_resid__estadia,ap_residencia_estadia,evolucao,dt_evolucao,cep,data_atualizacao
7172,10/28/2021,2021-10-24,MARE,3.1,óbito,11/02/2021,21046240,04/11/2021
7233,10/30/2021,2021-10-30,CAMPO DOS AFONSOS,5.1,recuperado,11/01/2021,21740001,04/11/2021
7335,10/28/2021,2021-10-25,VIDIGAL,2.1,recuperado,10/30/2021,22450230,04/11/2021
7339,11/03/2021,2021-10-30,TODOS OS SANTOS,3.2,óbito,10/30/2021,20735060,04/11/2021
7450,11/03/2021,2021-10-28,TIJUCA,2.2,óbito,10/28/2021,20550000,04/11/2021
...,...,...,...,...,...,...,...,...
439866,10/28/2021,2021-10-24,MEIER,3.2,Ativo,,20735240,11/04/2021
439873,10/27/2021,2021-10-23,HIGIENOPOLIS,3.2,Ativo,,21050720,11/04/2021
439878,10/27/2021,2021-10-25,PENHA,3.1,Ativo,,21070600,11/04/2021
439881,10/27/2021,2021-10-24,CENTRO,1.0,Ativo,,20021270,11/04/2021


In [12]:
###
# Realizando a contagem de casos em cada CEP
###
relevant_covid_cases_cep = covid_cases[
    covid_cases['dt_inicio_sintomas'] > dt.datetime.now() - dt.timedelta(days=15)].groupby(['cep']).count()

relevant_covid_cases_cep = relevant_covid_cases_cep.reset_index()

cases_by_cep = pd.DataFrame()
cases_by_cep[['cep', 'cases']] = relevant_covid_cases_cep[['cep', 'dt_inicio_sintomas']]


In [13]:
###
# Cruzando os dados de permanência em cada CEP com a incidência de casos de COVID-19
###
fin = entries.merge(cases_by_cep, on='cep', how='left').fillna(0)

In [14]:
###
# print
###
fin

Unnamed: 0,cep,data,duration,cases
0,21815190,"2021/10/22, 17:23:17",93774.039,0.0
1,21810101,"2021/10/24, 14:45:51",161701.578,0.0
2,21715460,"2021/10/24, 16:13:11",5118.068,0.0
3,21810101,"2021/10/24, 17:40:18",4740.597,0.0
4,21810101,"2021/10/25, 11:18:48",63449.34,0.0
5,21715340,"2021/10/25, 19:23:56",15053.392,0.0
6,21820000,"2021/10/25, 21:05:13",2139.678,0.0
7,21820000,"2021/10/25, 21:43:27",2102.262,0.0
8,21775120,"2021/10/26, 09:33:15",41100.652,0.0
9,21775440,"2021/10/26, 11:12:41",5966.308,0.0
