In [1]:
import pandas as pd
import datetime as dt

In [2]:
loc_history = pd.read_json("data/Histórico de localização.json")

In [3]:
locations = pd.json_normalize(loc_history['locations'])

In [4]:
locations.head()

Unnamed: 0,timestampMs,latitudeE7,longitudeE7,accuracy,activity,source,deviceTag,altitude,verticalAccuracy,platform,platformType,locationMetadata,velocity,heading
0,1378508750990,-229917099,-433592736,984,"[{'timestampMs': '1378508517137', 'activity': ...",CELL,248904379,,,,,,,
1,1378508766022,-229917099,-433592736,984,"[{'timestampMs': '1378508772583', 'activity': ...",CELL,248904379,,,,,,,
2,1378509026816,-229917099,-433592736,984,"[{'timestampMs': '1378508969051', 'activity': ...",CELL,248904379,,,,,,,
3,1378509072785,-229949469,-433592859,5,,GPS,248904379,,,,,,,
4,1378509118790,-229958679,-433588200,5,,GPS,248904379,,,,,,,


In [5]:
locations['timestampMs'] = pd.to_numeric(locations['timestampMs'])
locations['latitudeE7'] = pd.to_numeric(locations['latitudeE7'])
locations['longitudeE7'] = pd.to_numeric(locations['longitudeE7'])

In [6]:
locations = locations[
    locations['timestampMs'] > dt.datetime.timestamp(dt.datetime.now() - dt.timedelta(days=30)) * 1000]

In [7]:
import requests
import time

In [8]:
####
# URL de conexão com a API CEP Aberto
####
with open('token.txt') as file:
    token = file.readline()

url = "https://www.cepaberto.com/api/v3/nearest"
headers = {'Authorization': 'Token token=' + token}

In [9]:
entries = pd.DataFrame()
initTimestamp = 0
initLatitude = 0
initLongitude = 0
for index, location in locations.iterrows():
    if initTimestamp == 0:
        initTimestamp = location['timestampMs']
        initLatitude = location['latitudeE7']
        initLongitude = location['longitudeE7']
    else:
        ####
        # Verifica se houve alguma mudança significativa na localização baseada nos dados de lat e lng fornecidos pelo GPS
        ####
        if abs(location['latitudeE7'] - initLatitude) > 50000 or abs(location['longitudeE7'] - initLongitude) > 50000:
            duration = (dt.datetime.fromtimestamp(location['timestampMs'] / 1000) - dt.datetime.fromtimestamp(
                initTimestamp / 1000)).total_seconds()
            ####
            # O usuário deve ter permanecido por pelo menos 30min no local para ser considerado
            ####
            if duration > 1800:
                ####
                # Formatando os dados de localização para se comunicar com a API do CEP Aberto
                ####
                latitude = location['latitudeE7'] / 10000000
                longitude = location['longitudeE7'] / 10000000
                response = requests.get(url, headers=headers, params={'lat': latitude, 'lng': longitude})
                cep = response.json()['cep']

                ####
                # Adicionando entrada à lista
                ####
                tmp = {
                    'data': dt.datetime.fromtimestamp(location['timestampMs'] / 1000).strftime("%Y/%m/%d, %H:%M:%S"),
                    'duration': duration,
                    'cep': cep
                }
                entries = entries.append(tmp, ignore_index=True, sort=False)

                ####
                # A API é free mas limitada à uma consulta por segundo
                ####
                time.sleep(1)

            initTimestamp = location['timestampMs']
            initLatitude = location['latitudeE7']
            initLongitude = location['longitudeE7']

In [10]:
entries.head()

Unnamed: 0,data,duration,cep
0,"2021/10/20, 11:09:12",207129.821,21815510
1,"2021/10/20, 12:14:12",3777.611,21810008
2,"2021/10/20, 15:54:55",13242.433,21715360
3,"2021/10/20, 17:15:38",4582.152,21715321
4,"2021/10/21, 08:32:54",54639.098,21715550


In [11]:
###
# Carregamento dos dados da prefeitura do Rio de Janeiro
###
covid_cases = pd.read_csv('data/Dados_CEP_MRJ_covid_19.csv')


  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
###
# Convertendo os dados de data para objeto datetime e selecionando apenas os últimos 15 dias
###
covid_cases['dt_inicio_sintomas'] = pd.to_datetime(covid_cases['dt_inicio_sintomas'], dayfirst=True)
covid_cases[covid_cases['dt_inicio_sintomas'] > dt.datetime.now() - dt.timedelta(days=30)]

Unnamed: 0,dt_notific,dt_inicio_sintomas,bairro_resid__estadia,ap_residencia_estadia,evolucao,dt_evolucao,cep,data_atualizacao
6259,10/29/2021,2021-10-28,CAMPO GRANDE,5.2,óbito,11/16/2021,23055005,16/11/2021
6345,11/12/2021,2021-11-11,BANCARIOS,3.1,recuperado,11/15/2021,21910210,16/11/2021
6563,10/25/2021,2021-10-22,CACUIA,3.1,recuperado,11/12/2021,21921100,16/11/2021
6565,06/21/2021,2021-11-06,TAQUARA,4.0,recuperado,11/12/2021,22723021,16/11/2021
6634,04/18/2021,2021-11-04,IRAJA,3.3,recuperado,11/11/2021,21220530,16/11/2021
...,...,...,...,...,...,...,...,...
443963,05/27/2021,2021-12-05,REALENGO,5.1,Recuperado,,21725060,11/16/2021
444095,03/12/2021,2021-11-03,CATUMBI,1.0,Recuperado,,20251330,11/16/2021
444106,03/15/2021,2021-12-03,CATUMBI,1.0,Recuperado,,20251190,11/16/2021
444113,04/13/2021,2021-12-04,GUARATIBA,5.2,Recuperado,,23020000,11/16/2021


In [13]:
###
# Realizando a contagem de casos em cada CEP
###
relevant_covid_cases_cep = covid_cases[
    covid_cases['dt_inicio_sintomas'] > dt.datetime.now() - dt.timedelta(days=30)].groupby(['cep']).count()

relevant_covid_cases_cep = relevant_covid_cases_cep.reset_index()

cases_by_cep = pd.DataFrame()
cases_by_cep[['cep', 'cases']] = relevant_covid_cases_cep[['cep', 'dt_inicio_sintomas']]

In [14]:
###
# Cruzando os dados de permanência em cada CEP com a incidência de casos de COVID-19
###
fin = entries.merge(cases_by_cep, on='cep', how='left').fillna(0)

In [15]:
###
# print
###
fin


Unnamed: 0,data,duration,cep,cases
0,"2021/10/20, 11:09:12",207129.821,21815510,0.0
1,"2021/10/20, 12:14:12",3777.611,21810008,0.0
2,"2021/10/20, 15:54:55",13242.433,21715360,0.0
3,"2021/10/20, 17:15:38",4582.152,21715321,0.0
4,"2021/10/21, 08:32:54",54639.098,21715550,0.0
5,"2021/10/21, 09:09:14",2032.974,21721470,0.0
6,"2021/10/22, 17:23:17",115829.336,21815190,0.0
7,"2021/10/24, 14:45:51",161701.578,21810101,0.0
8,"2021/10/24, 16:13:11",5118.068,21715460,0.0
9,"2021/10/24, 17:40:18",4740.597,21810101,0.0
