In [33]:
# !pip install h3
# !pip install plotly
# !pip install haversine
# !pip install pyarrow

In [25]:
import h3
import numpy as np
import pandas as pd
import plotly.express as px
import scipy.sparse as sparse
from haversine import haversine
import matplotlib.pyplot as plt
from datetime import datetime, timezone

pd.options.display.float_format = '{:.4f}'.format
pd.set_option('display.max_colwidth', 1000)

In [26]:
checkins = pd.read_csv('checkins.csv')
estado = 'Minnesota'

In [27]:
checkins['userid'].nunique()

127

In [28]:
checkins['local_datetime'] = pd.to_datetime(checkins['local_datetime'])

# Region View

In [29]:
def generate_h3_cell(row):
    lat = row['latitude']
    lon = row['longitude']
    resolution = 10

    h3_cell = h3.geo_to_h3(lat, lon, resolution)
    return h3_cell

In [30]:
checkins['h3_cell'] = (
    checkins
    .apply(generate_h3_cell, axis=1)
)

checkins['h3_cell'].nunique()

4671

In [7]:
# (
#     checkins
#     .groupby('userid')
#     .agg(
#         qt_regioes_visitadas=('h3_cell', 'nunique'), 
#         frequencia_visitas_regioes=('h3_cell', 'count'), 
#         qt_localizacoes_visitadas=('placeid', 'nunique')
#     )
#     .sort_values(by=['qt_regioes_visitadas'], ascending=False)
# )

Unnamed: 0_level_0,qt_regioes_visitadas,frequencia_visitas_regioes,qt_localizacoes_visitadas
userid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9298,9498,26988,15969
18382,8831,19569,13281
136677,8370,14467,13008
1531870,6096,9112,8638
202890,5003,9728,7880
...,...,...,...
58657,2,2,2
57784,2,2,2
68022,2,4,2
21919,1,3,3


In [None]:
# px.box(
#     (
#         checkins
#         .groupby('userid')
#         .agg(
#             qt_regioes_visitadas=('h3_cell', 'nunique'), 
#             frequencia_visitas_regioes=('h3_cell', 'count'), 
#             qt_localizacoes_visitadas=('placeid', 'nunique')
#         )
#         .sort_values(by=['qt_regioes_visitadas'], ascending=False)
#     ),
#     y='qt_regioes_visitadas'
# )

## Region - region (distance)

**Entrada:**
Check-ins com **região h3**

**Saída:**
**Matriz de features** entre as regiões n x n, no qual **n** representa o número de regiões e cada elemento **i j** representa **a distância da região i até a região j em metros**.

É necessário também pegar o de-para das categorias que o Cláudio usa, de forma a usar um valor inteiro, ao invés do nome da categoria

In [31]:
def h3_centroid_distance(cell_h3_1, cell_h3_2):
    centroid_1 = h3.h3_to_geo(cell_h3_1)
    centroid_2 = h3.h3_to_geo(cell_h3_2)
    
    distance_meters = haversine(centroid_1, centroid_2) * 1000
    return distance_meters

In [32]:
def generate_user_region_distance_matrix(checkins):
    users = checkins['userid'].unique()

    users_regions_distance = pd.DataFrame(columns=['userid', 'matrices'])
    all_regions = checkins['h3_cell'].unique().tolist()
    all_regions_set = set(all_regions)

    for user in users:
        user_checkins = checkins[checkins['userid'] == user].sort_values(by='local_datetime')

        user_h3_regions = user_checkins['h3_cell'].unique().tolist()

        # calcula k-hop de cada região
        k = 3   # k-hop neighborhood

        user_h3_k_hop_regions = user_h3_regions.copy()

        for region in user_h3_regions:
            neighbors_regions_set = set(list(h3.k_ring(region, k)))

            intersecao = list(all_regions_set & neighbors_regions_set)

            user_h3_k_hop_regions.extend(intersecao)

        user_h3_k_hop_regions = list(set(user_h3_k_hop_regions))
        
        # cria mapeamento de regiões para construir a matriz
        regions_map = []
        i = 0
        for h3_region in user_h3_k_hop_regions:
            region_map = {'region_id': i, 'value': h3_region}
            i = i+1
            regions_map.append(region_map)

        region_distance_matrix = []

        # calcula distância entre regiões
        for region_map_i in regions_map:
            region_h3_i = region_map_i['value']
            distances_i_j = []

            for region_map_j in regions_map:
                region_h3_j = region_map_j['value']
                
                distance = round(h3_centroid_distance(region_h3_i, region_h3_j), 2)
                distances_i_j.append(distance)
            
            region_distance_matrix.append(distances_i_j)

        novo_dado = {
            'userid': user, 
            'matrices': str(region_distance_matrix),
            # 'category': str(user_checkins['category'].unique()) 
        }

        aux_df = pd.DataFrame(novo_dado, index=[0])

        users_regions_distance = pd.concat([users_regions_distance, aux_df], ignore_index=True)

    return users_regions_distance

In [33]:
users_regions_distance = generate_user_region_distance_matrix(checkins)

In [None]:
users_regions_distance.head(3)

Unnamed: 0,userid,matrices
0,1338,"[[0.0, 3927.26, 15115.3, 2481.7, 133.36, 474.76, 136.69, 6496.41, 17974.27, 3728.03, 15802.53, 15599.74, 15585.19, 2371.6, 357.13, 2397.65, 18034.6, 15211.14, 47440.86, 15325.75, 49164.07, 6626.18], [3927.26, 0.0, 19037.29, 1967.44, 3853.31, 4372.9, 3983.16, 10368.84, 21894.05, 1818.95, 19729.4, 19526.24, 19510.28, 2086.48, 3699.63, 3816.56, 21949.4, 19137.81, 48016.38, 19251.01, 49607.89, 10499.14], [15115.3, 19037.29, 0.0, 17345.39, 15184.74, 14664.51, 15054.67, 8752.04, 2863.69, 18583.0, 948.19, 735.36, 568.98, 17216.08, 15369.76, 16234.18, 2960.49, 592.45, 47374.95, 399.63, 49499.95, 8623.78], [2481.7, 1967.44, 17345.39, 0.0, 2368.16, 2832.07, 2482.0, 8612.49, 20188.37, 2944.32, 18072.54, 17864.23, 17835.14, 130.42, 2402.0, 3573.32, 20227.23, 17478.75, 46708.68, 17577.6, 48352.33, 8742.35], [133.36, 3853.31, 15184.74, 2368.16, 0.0, 521.55, 130.39, 6548.3, 18042.5, 3713.75, 15876.69, 15673.22, 15656.98, 2256.09, 410.07, 2476.49, 18100.56, 15284.91, 47339.66, 15397.71, 49060.09, ..."
1,162577,"[[0.0, 1736.67, 5489.06, 474.69, 3356.28, 3745.96, 596.69, 1749.84, 1271.19, 1627.7, 4885.63, 666.75, 16447.5, 5462.25, 14118.28, 462.93, 2142.34, 2080.29, 5015.9, 3739.6, 3909.85, 16490.94, 13732.84, 3982.59, 46413.85, 13831.88, 3651.05], [1736.67, 0.0, 3927.26, 1656.44, 1695.21, 2481.7, 1507.46, 133.36, 474.76, 136.69, 6496.41, 1822.14, 17974.27, 3728.03, 15599.74, 1277.93, 1794.18, 357.13, 6626.18, 2086.46, 2397.65, 18034.6, 15211.14, 2608.33, 47440.86, 15325.75, 2184.14], [5489.06, 3927.26, 0.0, 5202.07, 2253.65, 1967.44, 5413.15, 3853.31, 4372.9, 3983.16, 10368.84, 5241.02, 21894.05, 1818.95, 19526.24, 5085.94, 3850.65, 3699.63, 10499.14, 1872.91, 3816.56, 21949.4, 19137.81, 1628.29, 48016.38, 19251.01, 1848.11], [474.69, 1656.44, 5202.07, 0.0, 3149.99, 3391.7, 973.88, 1633.06, 1245.01, 1528.61, 5221.23, 236.5, 16800.2, 5318.0, 14489.31, 596.71, 1699.07, 2013.47, 5351.01, 3520.73, 3978.2, 16836.68, 14105.28, 3651.18, 46129.47, 14198.87, 3354.26], [3356.28, 1695.21, 2253.65, 31..."
2,174066,"[[0.0, 1331.67, 1736.67, 5489.06, 3745.96, 3356.28, 474.69, 596.69, 1749.84, 1668.37, 1271.19, 1627.7, 4885.63, 666.75, 16447.5, 1247.5, 5462.25, 462.93, 3617.23, 2142.34, 2080.29, 5015.9, 3739.6, 3909.85, 16490.94, 3982.59, 46413.85, 3651.05], [1331.67, 0.0, 3046.26, 6810.92, 5022.09, 4687.38, 1630.39, 1620.14, 3071.08, 2081.18, 2572.65, 2945.72, 3591.23, 1657.58, 15171.82, 136.67, 6772.9, 1772.74, 4892.01, 3276.26, 3377.07, 3720.92, 5071.19, 5045.13, 15206.89, 5278.61, 45942.16, 4967.19], [1736.67, 3046.26, 0.0, 3927.26, 2481.7, 1695.21, 1656.44, 1507.46, 133.36, 2733.71, 474.76, 136.69, 6496.41, 1822.14, 17974.27, 2945.74, 3728.03, 1277.93, 2371.6, 1794.18, 357.13, 6626.18, 2086.46, 2397.65, 18034.6, 2608.33, 47440.86, 2184.14], [5489.06, 6810.92, 3927.26, 0.0, 1967.44, 2253.65, 5202.07, 5413.15, 3853.31, 5600.52, 4372.9, 3983.16, 10368.84, 5241.02, 21894.05, 6735.2, 1818.95, 5085.94, 2086.48, 3850.65, 3699.63, 10499.14, 1872.91, 3816.56, 21949.4, 1628.29, 48016.38, 1848.11], [3..."


In [None]:
users_regions_distance.shape

(59, 2)

In [None]:
users_regions_distance[['userid', 'matrices']].to_csv(f'region_distance_feature_{estado}.csv', index=False)

## Region - region (adjacency matrix)

**Entrada:**
Check-ins com **região h3**

**Saída:**
**Matriz de adjacência** entre as regiões n x n, no qual **n** representa o número de regiões e cada elemento **i j** representa se **a região j sucede a região i, de acordo com a data** do checkin. A informação em i e j é ponderada, **indicando o número de vezes que j sucede i**.

In [None]:
def generate_user_region_adjacency_matrix(checkins):
    users = checkins['userid'].unique()

    users_regions_adjacency = pd.DataFrame(columns=['userid', 'matrices'])
    all_regions = checkins['h3_cell'].unique().tolist()
    all_regions_set = set(all_regions)

    for user in users:
        user_checkins = checkins[checkins['userid'] == user]
        user_checkins = user_checkins.sort_values(by='local_datetime')
        user_h3_regions = user_checkins['h3_cell'].unique().tolist()

        # calcula k-hop de cada região
        k = 3   # k-hop neighborhood

        user_h3_k_hop_regions = user_h3_regions.copy()

        for region in user_h3_regions:
            neighbors_regions_set = set(list(h3.k_ring(region, k)))

            intersecao = list(all_regions_set & neighbors_regions_set)

            user_h3_k_hop_regions.extend(intersecao)

        user_h3_k_hop_regions = list(set(user_h3_k_hop_regions))

        adjacency_matrix = pd.DataFrame(0, index=user_h3_k_hop_regions, columns=user_h3_k_hop_regions)

        for i in range(len(user_checkins) - 1):
            localizacao_atual = user_checkins.iloc[i]['h3_cell']
            localizacao_proxima = user_checkins.iloc[i + 1]['h3_cell']
            adjacency_matrix.at[localizacao_atual, localizacao_proxima] += 1

        novo_dado = {
            'userid': user, 
            'matrices': str(adjacency_matrix.values.tolist()),
        }

        aux_df = pd.DataFrame(novo_dado, index=[0])

        users_regions_adjacency = pd.concat([users_regions_adjacency, aux_df], ignore_index=True)   

    return users_regions_adjacency

In [None]:
users_regions_adjacency_matrix = generate_user_region_adjacency_matrix(checkins)

In [None]:
users_regions_adjacency_matrix.shape

(59, 2)

In [None]:
users_regions_adjacency_matrix[['userid', 'matrices']].to_csv(f'region_adjacency_matrix_{estado}.csv', index=False)

## Region - region (adjacency feature)

**Entrada:**
Check-ins com **região h3**

**Saída:**
**Matriz de feature** entre as regiões n x n, no qual **n** representa o número de regiões e cada elemento **i j** representa se a região i é adjacente a região j, geograficamente falando.

In [None]:
def generate_user_region_adjacency_feature(checkins):
    users = checkins['userid'].unique()

    users_regions_adjacency_feature = pd.DataFrame(columns=['userid', 'matrices'])
    all_regions = checkins['h3_cell'].unique().tolist()
    all_regions_set = set(all_regions)

    for user in users:
        user_checkins = checkins[checkins['userid'] == user].sort_values(by='local_datetime')
        user_h3_regions = user_checkins['h3_cell'].unique().tolist()

        # calcula k-hop de cada região
        k = 3   # k-hop neighborhood

        user_h3_k_hop_regions = user_h3_regions.copy()

        for region in user_h3_regions:
            neighbors_regions_set = set(list(h3.k_ring(region, k)))

            intersecao = list(all_regions_set & neighbors_regions_set)

            user_h3_k_hop_regions.extend(intersecao)

        user_h3_k_hop_regions = list(set(user_h3_k_hop_regions))
        
        # cria mapeamento de regiões para construir a matriz
        regions_map = []

        i = 0
        for h3_region in user_h3_k_hop_regions:
            region_map = {'region_id': i, 'value': h3_region}
            i = i+1
            regions_map.append(region_map)

        adjacency_matrix = []

        for region_map_i in regions_map:
            region_h3_i = region_map_i['value']
            adjacency_i_j = []

            for region_map_j in regions_map:
                region_h3_j = region_map_j['value']   

                if (h3.h3_indexes_are_neighbors(region_h3_i, region_h3_j)):
                    value = 1
                else:
                    value = 0

                adjacency_i_j.append(value)
            
            adjacency_matrix.append(adjacency_i_j)
        
        novo_dado = {
            'userid': user, 
            'matrices': str(adjacency_matrix),
            # 'category': str(user_checkins['category'].unique()) 
        }

        aux_df = pd.DataFrame(novo_dado, index=[0])

        users_regions_adjacency_feature = pd.concat([users_regions_adjacency_feature, aux_df], ignore_index=True)

    return users_regions_adjacency_feature

In [None]:
users_regions_adjacency_feature = generate_user_region_adjacency_feature(checkins)

In [None]:
users_regions_adjacency_feature.shape

(59, 2)

In [None]:
users_regions_adjacency_feature[['userid', 'matrices']].to_csv(f'region_adjacency_feature_{estado}.csv', index=False)

## Region - POI (checkins quantity)

**Entrada:**

Checkins com uma **região h3** associada à latitude e longitude 

**Saída:**
**Matriz de features **contendo em cada linha** a região h3** e em cada coluna a **quantidade de check-ins em um dado POI**.

In [119]:
feature_matrix_poi_region_qt = (
    pd.crosstab(
        index=checkins_filtrados['h3_cell'], columns=checkins_filtrados['category']
    )
)

feature_matrix_poi_region_qt.columns = (
    [f'{col}' for col in feature_matrix_poi_region_qt.columns]
)

feature_matrix_poi_region_qt = feature_matrix_poi_region_qt.sort_index()

In [120]:
feature_matrix_poi_region_qt

Unnamed: 0_level_0,0,1,2,3,4,5,6
h3_cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8a264902914ffff,0,2,0,0,0,0,0
8a264904a0a7fff,0,0,1,0,0,0,0
8a264904a31ffff,0,0,1,0,0,0,0
8a264904a8cffff,0,0,1,0,0,0,0
8a264904aadffff,2,0,0,0,0,0,0
...,...,...,...,...,...,...,...
8a44eeb6b407fff,1,0,0,0,0,0,0
8a44eeb6b72ffff,2,0,0,0,0,0,0
8a44eeb6bc6ffff,0,0,1,0,0,0,0
8a44eeb6bd5ffff,0,0,0,1,0,0,0


## Region - POI (poi quantity)

**Entrada:**

Checkins com uma **região h3**.

**Saída:**
**Matriz de features **contendo em cada linha** a região h3** e em cada coluna a **quantidade de de POIs existentes**.

In [126]:
checkins_filtrados

Unnamed: 0,userid,category,placeid,local_datetime,latitude,longitude,country_name,state_name,h3_cell
2726,112,3,18219,2010-03-13 23:18:16+00:00,33.5798,-86.0694,United States,Alabama,8a44e8689347fff
3252,112,0,38943,2010-10-30 00:27:18+00:00,33.5426,-86.5922,United States,Alabama,8a44e821a99ffff
3251,112,0,38943,2011-03-12 21:02:39+00:00,33.5426,-86.5922,United States,Alabama,8a44e821a99ffff
3260,112,0,6343095,2010-10-29 22:29:32+00:00,33.5493,-86.5958,United States,Alabama,8a44e821ab97fff
2926,112,5,4056109,2011-01-29 18:20:35+00:00,33.5101,-86.8102,United States,Alabama,8a44e8359907fff
...,...,...,...,...,...,...,...,...,...
3263,828903,0,370321,2010-10-02 17:42:25+00:00,33.9977,-86.0911,United States,Alabama,8a44ebc4b217fff
537,828903,2,737252,2010-10-03 14:40:25+00:00,33.5672,-86.5196,United States,Alabama,8a44e828c157fff
538,828903,2,525500,2010-10-02 16:46:07+00:00,33.9977,-86.0890,United States,Alabama,8a44ebc4b2a7fff
44,1331047,4,27859,2011-01-31 02:31:47+00:00,34.6463,-86.7750,United States,Alabama,8a2649356507fff


In [129]:
checkins_filtrados[checkins_filtrados['h3_cell'] == '8a44e83598d7fff']

Unnamed: 0,userid,category,placeid,local_datetime,latitude,longitude,country_name,state_name,h3_cell
3671,1556,0,157148,2011-01-17 19:25:34+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3672,1556,0,157148,2010-01-26 21:49:50+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3673,1556,0,157148,2010-01-18 18:08:29+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3674,1556,0,157148,2009-12-18 20:36:20+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3675,1556,0,157148,2009-12-17 17:44:37+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3677,1556,0,157148,2009-12-15 21:16:18+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3678,1556,0,157148,2009-12-10 14:36:38+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3679,1556,0,157148,2009-12-02 21:03:38+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
3676,1556,0,157148,2009-12-16 20:24:39+00:00,33.5161,-86.8067,United States,Alabama,8a44e83598d7fff
1103,1556,2,157153,2009-12-02 21:04:53+00:00,33.5159,-86.8068,United States,Alabama,8a44e83598d7fff


In [127]:
dados_feature = checkins_filtrados[['h3_cell', 'category', 'placeid']].drop_duplicates(subset=['h3_cell', 'placeid'])

cross_tab = pd.crosstab(dados_feature['h3_cell'], dados_feature['category'])

cross_tab

category,0,1,2,3,4,5,6
h3_cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8a264902914ffff,0,1,0,0,0,0,0
8a264904a0a7fff,0,0,1,0,0,0,0
8a264904a31ffff,0,0,1,0,0,0,0
8a264904a8cffff,0,0,1,0,0,0,0
8a264904aadffff,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...
8a44eeb6b407fff,1,0,0,0,0,0,0
8a44eeb6b72ffff,1,0,0,0,0,0,0
8a44eeb6bc6ffff,0,0,1,0,0,0,0
8a44eeb6bd5ffff,0,0,0,1,0,0,0


In [130]:
cross_tab.loc['8a44e83598d7fff']

category
0    1
1    0
2    4
3    0
4    0
5    0
6    0
Name: 8a44e83598d7fff, dtype: int64

# PMI de co-ocorrência de localização deslocada (informações mútuas pontuais)

In [77]:
# retirado do código do HMRM
def create_location_coocurrency_matrix(checkins):
        try: 
            users_checkins_sorted = checkins.sort_values(by=["local_datetime"])
            regions = users_checkins_sorted["h3_id"].tolist()
            number_of_regions = checkins["h3_id"].nunique()
            
            location_co_ocurrency = sparse.lil_matrix(
                (number_of_regions, number_of_regions)
            )  ##location co occurency represents memory for save memory

            for i in range(len(regions)):
                for j in range(1, 6):
                    if (i - j) < 0:
                        break
                    location_co_ocurrency[regions[i], regions[i - j]] += 1
                for j in range(1, 6):
                    if (i + j) > len(regions) - 1:
                        break
                    location_co_ocurrency[regions[i], regions[j + i]] += 1
            sum_of_dl = np.sum(location_co_ocurrency)
            l_occurrency = np.sum(location_co_ocurrency, axis=1).reshape(-1, 1)
            c_occurrency = np.sum(location_co_ocurrency, axis=0).reshape(1, -1)

            for i in range(number_of_regions):
                line = location_co_ocurrency[i].toarray()
                ##PMI em subdivisoes da matriz esparsa
                location_co_ocurrency[i] = np.maximum(
                    np.log2(
                        np.maximum(line * sum_of_dl, 1)
                        / (l_occurrency[i] * c_occurrency)
                    ),
                    0,
                )
            
            return (location_co_ocurrency)

        except Exception as e:
            raise e

In [80]:
checkins['h3_id'] = (
    checkins['h3_cell'].apply(lambda x: np.where(h3_regions == x)[0][0])
)

# print(create_location_coocurrency_matrix(checkins))

  (0, 62)	7.2845927878879255
  (0, 116)	9.284592787887926
  (0, 167)	7.6996302871667694
  (0, 168)	9.284592787887926
  (0, 606)	2.4019497385260844
  (0, 993)	3.640736598113201
  (0, 1049)	4.03666527444434
  (0, 1085)	4.640736598113201
  (0, 1371)	5.529705285724457
  (0, 1439)	4.477237865830322
  (0, 1440)	5.114667786445613
  (0, 1458)	3.176068331109757
  (0, 1464)	6.477237865830322
  (0, 1620)	4.377702192279408
  (0, 1631)	9.284592787887926
  (0, 1634)	7.6996302871667694
  (0, 1894)	9.284592787887926
  (0, 1900)	8.284592787887926
  (0, 1984)	1.577233655807043
  (0, 1988)	6.114667786445613
  (0, 2001)	4.761030831830913
  (0, 2164)	1.7688929496038834
  (0, 2184)	6.6996302871667694
  (0, 2195)	1.8007770106236693
  (0, 2371)	4.6996302871667694
  (0, 2597)	6.962664693000564
  (0, 2949)	2.7767981476892296
  (0, 3034)	9.284592787887926
  (0, 3035)	9.284592787887926
  (0, 3128)	9.284592787887926
  (0, 3246)	6.6996302871667694
  (0, 3433)	1.6552361678083163
  (0, 3528)	6.2845927878879255
  (0, 

# PMI de localização e tempo alterado

In [88]:
def create_region_time_matrix(checkins):
    regions = checkins["h3_id"].tolist()
    datetimes = checkins["local_datetime"].tolist()
    number_of_regions = checkins["h3_id"].nunique()
    Dt = np.zeros((number_of_regions, 48))

    for i in range(len(regions)):
        if datetimes[i].weekday() >= 5:
            Dt[regions[i]][datetimes[i].hour + 24] += 1
        else:
            Dt[regions[i]][datetimes[i].hour] += 1

    sum_of_dt = np.sum(Dt)
    l_occurrency = np.sum(Dt, axis=1).reshape(-1, 1)
    c_occurrency = np.sum(Dt, axis=0).reshape(1, -1)

    mult = l_occurrency * c_occurrency
    mult[mult == 0] = -1

    tmp = np.maximum(Dt * sum_of_dt, 1) / mult
    tmp[tmp < 0] = 0
    region_time = np.maximum(np.log2(tmp), 0)

    return region_time