In [1]:
import pandas as pd
import numpy as np
from collections import Counter

import plotly.graph_objects as go 

In [2]:
users = pd.read_parquet('../data/processed/users.parquet.gzip')

In [3]:
c = Counter(users[~users.country.isna()].country.values)
countries = dict(c.most_common(10))
k = countries.keys()
v = countries.values()

countries = {'countries':list(k),
             'values': list(v)
            }

In [4]:
colors = ['#1f77b4',  '#ff7f0e',  '#2ca02c', '#d62728', '#9467bd', 
         '#8c564b', '#e377c2', '#ffc300', '#bcbd22','#17becf',   
         '#339966','#ff6686', '#f5b8ec', '#bb77dd', '#77ddca']

fig = go.Figure(data=[go.Bar(x=countries['countries'], y=countries['values'], 
                             marker_color=colors,
                             marker_opacity=0.7,
                             width=0.75
                             )])
fig.update_layout(
    barmode="stack",
    title=dict(
        text="Dataset' national composition",
        font=dict(size=30)
    ),
    xaxis_title=dict(
        text="Country",
        font=dict(size=22)
    ),
    yaxis_title=dict(
        text="People",
        font=dict(size=22)
    ),
    #width=1400,
    height=1000,
    yaxis_type="log", 
    yaxis_tickmode="array", 
    yaxis_tickvals=[0, 100, 400, 1000, 2500, 5000, 10000, 40000],
    plot_bgcolor='rgb(255,255,255)', # White plot background
    xaxis = dict(
        tickfont = dict(
            size = 20  # Set x-axis tick font size to 20px
        )
    ),
    yaxis = dict(
        tickfont = dict(
            size = 20  # Set x-axis tick font size to 20px
        )
    )
    
)
fig.show()

In [6]:
cities_all = users[~users.city.isna()].city.values

In [7]:
cities_all

array(['Санкт-Петербург', 'Москва', 'Hong Kong', ..., 'Казань', 'Москва',
       'Ереван'], dtype=object)

In [19]:
import plotly.graph_objects as go

c = Counter(cities_all)
cities_data = dict(c.most_common(14))

# Create figure 
fig = go.Figure()

city_locations = {
  'Moscow': (55.7558, 37.6173),     
  'Saint-Petersburg': (59.9392, 30.3141), 
  'Ekaterinburg': (56.8389, 60.5972),
  'Novosibirsk': (55.0167, 82.9333),   
  'Krasnodar': (45.0354, 38.9720),    
  'Chelyabinsk': (55.1540, 61.4260),   
  'Rostov-on-Don': (47.2314, 39.7247),  
  'Nizhny Novgorod': (56.3269, 44.0063),
  'Krasnoyarsk': (56.0183, 92.8667),     
  'Samara': (53.1958, 50.1521),    
  'Perm': (58.0102, 56.2544),        
  'Voronezh': (51.6719, 39.2002),         
  'Omsk': (55.0041, 73.4208),    
  'Kazan': (55.7922, 49.1054) 
}

colors = [
    '#ff0000',  # Red
    '#ff9900',  # Orange
    '#ffff00',  # Yellow
    '#00ff00',  # Green
    '#00ffff',  # Cyan
    '#0000ff',  # Blue
    '#9900ff',  # Violet
    '#ff00ff',  # Magenta
    '#ff9966',   # Salmon
    '#9966ff',    # Blue violet
    '#66ff99',   # Spring green
    '#ffcc99' ,  # Peach
    '#ff9999' ,  # Rose
    '#ff6600'     # Vermillion  
]

# Add map 
fig.add_trace(go.Scattermapbox(
    lat=np.array(list(city_locations.values()))[:,0], 
    lon=np.array(list(city_locations.values()))[:,1],
    mode='markers+text',
    marker=go.scattermapbox.Marker(
        size=np.array(list(cities_data.values()))/40,
        color=colors,
        opacity=0.4
    ),
    hovertext=list(cities_data.values()),  # City names
    #text=list(city_locations.keys())     # Same city names
))

fig.update_traces(textposition='top center') 

# Add layout
fig.update_layout(
    title='Number of people by cities in Russia',
    width=1600,
    height=800,
    mapbox_style="open-street-map",
    mapbox_zoom=3.9, 
    mapbox_center = {"lat": 55, "lon": 62}
)

# Display figure
fig.show()
#fig.write_image("cities_map.png")