In [1]:
import numpy as np
import pandas as pd
import streamlit as st
from dateutil.relativedelta import relativedelta

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import json
import requests

config_path = 'config.json'
with open(config_path) as f: #Loading configuration file to a dict
    config = json.load(f)

with open(config['data_geo']) as f: # loading geo info from data/geo_init.json
    data_geo = json.load(f)

content_dict = {}
for page in config['data_content'].keys(): # loading page text for each page and saving to a dict
    with open(config['data_content'][page], 'r') as f:
        content_dict[page] = f.read()


#Lets load actual data
period_last = config['period_last']
df_geo = pd.DataFrame.from_dict(data_geo)


In [2]:
df_geo

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
direction,Popasna,Mykolaiv,Kharkiv,Lyman,Novopavlivske,Sievierodonetsk,Izyum,Zaporizhzhia,Kurakhove,Avdiivka,Kramatorsk,Donetsk,Sloviansk,Kryvyi Rih,Bakhmut,Kupiansk
oblast,Luhansk,Mykolaiv,Kharkiv,Donetsk,Mykolaiv,Luhansk,Kharkiv,Zaporizhzhia,Donetsk,Donetsk,Donetsk,Donetsk,Donetsk,Dnipropetrovsk,Donetsk,Kharkiv
coordinate_ua,"[38.3777033, 48.6322687]","[31.9939666, 46.9758615]","[36.2310146, 49.9923181]","[37.8168012, 48.9801314]","[32.9033029, 47.1353188]","[38.4936475, 48.9478698]","[37.2784125, 49.1913721]","[35.1182867, 47.8507859]","[37.2826414, 47.9835214]","[37.7466719, 48.1338824]","[37.5843812, 48.7389415]","[37.8013407, 48.0158753]","[37.6058241, 48.8522691]","[33.3917703, 47.9102734]","[38.0020994, 48.5894123]","[49.7132963, 37.6141992]"


In [3]:
config['url']

{'personnel': 'https://raw.githubusercontent.com/PetroIvaniuk/2022-Ukraine-Russia-War-Dataset/main/data/russia_losses_personnel.json',
 'equipment': 'https://raw.githubusercontent.com/PetroIvaniuk/2022-Ukraine-Russia-War-Dataset/main/data/russia_losses_equipment.json',
 'oryx': 'https://raw.githubusercontent.com/PetroIvaniuk/2022-Ukraine-Russia-War-Dataset/main/data/russia_losses_equipment_oryx.json'}

In [71]:
df_oryx = pd.DataFrame(requests.get(config['url']['oryx']).json())
df_personnel = pd.DataFrame(requests.get(config['url']['personnel']).json())
df_equipment = pd.DataFrame(requests.get(config['url']['equipment']).json())

In [72]:
df_personnel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 818 entries, 0 to 817
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        818 non-null    object 
 1   day         818 non-null    int64  
 2   personnel   818 non-null    int64  
 3   personnel*  818 non-null    object 
 4   POW         62 non-null     float64
dtypes: float64(1), int64(2), object(2)
memory usage: 32.1+ KB


In [73]:
df_personnel['date'] = pd.to_datetime(df_personnel['date'],format='%Y-%m-%d')
df_personnel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 818 entries, 0 to 817
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        818 non-null    datetime64[ns]
 1   day         818 non-null    int64         
 2   personnel   818 non-null    int64         
 3   personnel*  818 non-null    object        
 4   POW         62 non-null     float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 32.1+ KB


In [78]:
df_personnel['POW']= df_personnel['POW'].fillna(0)
df_personnel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 818 entries, 0 to 817
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        818 non-null    datetime64[ns]
 1   day         818 non-null    int64         
 2   personnel   818 non-null    int64         
 3   personnel*  818 non-null    object        
 4   POW         818 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 32.1+ KB


In [96]:
def convert2daily(df:pd.DataFrame,cols:str):
    '''
    :param df: Data Frame I want to convert columns to daily
    :param cols: Columns with Cummulative Sum that I wawnt to change to daily 
    :return: DataFrame with daily losses
    '''
    new_col = cols+'_daily'
    df[new_col] = df[cols].diff()
    df.loc[0,new_col]= df.loc[0,cols]
    df[new_col].fillna(0)
    return df
    

In [101]:
def line_plot(df,col,slider:bool):
    fig = px.bar(df, x='date', y=col)
    if slider:
        fig.update_xaxes(
            rangeslider_autorange=True,
            rangeselector_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
    return fig

In [102]:
fig= line_plot(df_personnel,'personnel_daily',slider=True)
fig.show()

In [153]:
monthly_counts = df_personnel.groupby([df_personnel['date'].dt.year, df_personnel['date'].dt.month])['personnel_daily'].sum()

In [155]:
test_df = pd.DataFrame({
    'Year': monthly_counts.index.get_level_values(0),
    'Month': monthly_counts.index.get_level_values(1),
    'count': monthly_counts.values
})

In [187]:
test_df['Month'] = pd.to_datetime(test_df['Month'], format='%m')
test_df['Month'] = test_df['Month'].dt.strftime('%B')

test_df

Unnamed: 0,Year,Month,count
0,2022,February,5300.0
1,2022,March,12200.0
2,2022,April,5700.0
3,2022,May,7300.0
4,2022,June,5100.0
5,2022,July,5230.0
6,2022,August,7070.0
7,2022,September,11180.0
8,2022,October,12740.0
9,2022,November,17060.0


In [213]:
fig = px.treemap(
    test_df,
    path=[px.Constant('All'),'Year','Month'],
    values='count',
)


fig.update_traces(
                  marker=dict(cornerradius=5),
                  marker_colorscale = 'darkmint')

fig.show()

In [211]:
df_equipment.day.iloc[-1]


819

In [230]:
monthly=test_df.groupby('Month')['count'].sum().reset_index(name='count')
fig = px.bar(data_frame=monthly,x='Month',y='count')
fig.update_traces(marker_colorscale = 'sunsetdark')

fig.show()


In [220]:
monthly

Month
April        49400.0
August       23900.0
December     47550.0
February     55570.0
January      47540.0
July         23080.0
June         25530.0
March        64730.0
May          52350.0
November     45010.0
October      35420.0
September    26290.0
Name: count, dtype: float64