# Femicides
##### Data analysis and Visualization by iMEdD Lab/ Thanasis Troboukis
##### Project leader: MIIR

In [1]:
# Import libraries
import pandas as pd
import re
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns
import math
import itertools
from functools import reduce

In [2]:
# set options for printing of dataframes
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 1500)

In [3]:
# function that cleans rows in the google spreadsheet cells.
def clean(item):
    '''
    function that cleans rows in the google spreadsheet cells.
    applied with lambda function
    '''
    if isinstance(item,str):
        item=item.replace(':','').replace(',','').replace('?','')
        item = item.replace('Germany (until 1990 former territory of the FRG)','Germany').replace('Czech Republic','Czechia')
    try:
        if item.startswith('N/A'):
            item = np.nan
    except:
        pass
    return item

#function to read csv's
def read(paths):
    '''
    function to read csv's (if it's a list of paths, it returns a list of dataframes)
    it also cleans the data from strings and replaces Germany's country name.
    '''
    if type(paths)==list:
        list_dfs = []
        for path in paths:
            df = pd.read_csv(path)
            df = df.applymap(lambda x: clean(x))
            #convert 
            df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
            list_dfs.append(df)
        return list_dfs
    else:
        df = pd.read_csv(paths)
        # clean strings from dataset
        df = df.applymap(lambda x: clean(x))
        # convert strings to numbers
        df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
        return df

# Load datasets
URL names reflect the tabs in the [google spreasheet](https://docs.google.com/spreadsheets/d/1VG3laHmIt-WbKLfVmC8FJpm_m6drp1RXuJFScCK61TQ/edit?usp=sharing).

### 1 Save urls & Columns to be renamed*

In [4]:
#url dataset 1.1
one_1 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=1746378177&single=true&output=csv'

In [5]:
#url dataset 1.2 - EIGE indicators
one_2 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=822247758&single=true&output=csv'

In [6]:
#columns of 1.2 to be renamed to be renamed
cols = {"YEAR": "Year", "indicator 1 - Annual number of women (aged 18 and over) victims of intimate partner violence committed by men (aged 18 and over), as recorded by police\nIPV victims (f) ":"vic_intimate_violence", "indicator 2 - Annual number of REPORTED ofences related to intimate partner violence against women committed by men (aged 18 and over)\nIPV reported ofences (f)":"reported_offences_int_violence", 'indicator 3 - 3 - Annual number of men (aged 18 and over) perpetrators of intimate partner violence against women (and percentage of male population that are perpetrators)\nIPV perpetrators (m)':'men_perpetrators_int_violence', 'indicator 4 - Annual number of women (aged 18 and over) victims of physical intimate partner violence committed by men (aged 18 and over), as recorded by police\nIPV victims (f) – physical':'physical_violence',"indicator 5 - Annual number of women (aged 18 and over) victims of psychological intimate partner violence committed by men (aged 18 and over), as recorded by police\nIPV victims (f) – psychological":"psychological_violence","indicator 6 - Annual number of women (aged 18 and over) victims of sexual intimate partner violence committed by men (aged 18 and over), as recorded by police\nIPV victims (f) – sexual":"sexual_violence","indicator 7 - Annual number of women (aged 18 and over) victims of economic intimate partner violence committed by men (aged 18 and over), as recorded by police\nIPV victims (f) – economic":"economic_violence","indicator 8 - Annual number of women (aged 18 and over) victims reporting rape committed by men (aged 18 and over), as recorded by police\nRape victims (f)":"rape_victims",'Indicator 9 - Women victims of intimate partner femicide (aged 18 and over) committed by a male intimate partner (aged 18 and over) - number as part of total homicides':'femicides','9Β - Women victims of homicide total (aged 18 and over) ':'homicide_total','indicator 10 - Annual number of protection orders applied for and granted in cases of intimate partner violence against women - IPV protection orders':'protection_orders','indicator 11 - Annual number of men (aged 18 and over) prosecuted for intimate partner violence against women\nIPV prosecuted (m)':'men_prosecuted_for_int_violence','indicator 12 - Annual number of men (aged 18 and over) sentenced for intimate partner violence against women\nIPV sentenced (m)':'men_sentenced','indicator 13 - Annual number of men (aged 18 and over) sentenced for intimate partner violence against women held in prison or with a sanction involving a form of deprivation of liberty\nIPV held in prison (m)':'men_held_in_prison'}

In [7]:
# 1.3 1.3 Victim-Offender Relationship (Eurostat)
one_3 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=1910359684&single=true&output=csv'

In [8]:
# 1.4 1.4 Victims-Fem -Intent homicides-sexual offences (Eurostat)
one_4 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=1569193583&single=true&output=csv'

In [9]:
# 2.0 Suspected-Prosecuted-Convicted Males (EUROSTAT)
two = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=200532598&single=true&output=csv'

In [10]:
#path of female population EUROSTAT
female_pop = 'https://raw.githubusercontent.com/troboukis/FEMICIDES/main/female_pop.csv'

### 3 Load datasets
#### list of all datasets
1. <b>df</b> -> 1.1 *we'll merge everything to this dataset*
2. <b>indicators</b> -> 1.2
3. <b>vo</b> -> 1.3
3. <b>rape_vic</b> -> 1.4
4. <b>fp</b> -> female population (github link)
5. <b>dfp</b> -> 2.0 (suspected - prosecuted - convicted males)

In [11]:
# Read datasets
df = read(one_1)

In [12]:
indicators = read(one_2)
#rename columns
indicators.rename(columns=cols,inplace = True)
vo = read(one_3)
rape_vic=read(one_4)
dfp = read(two)


In [13]:
fpop = read(female_pop)
fpop.rename(columns={'OBS_VALUE':'female_population','geo':'country_code',"TIME_PERIOD":"Year" },inplace=True)
fpop = fpop.groupby(['country_code','Year']).female_population.sum().reset_index()

In [14]:
# Columns that we'll drop
drop_columns=['Collected by', 'Source', 'Unnamed: 10','Link','NOTES','pct of femicides to female homicide victims']

### 4 Merge Datasets
We'll merge to the df the following dataframes:
- indicators
- fp

In [15]:
df = pd.merge(df, fpop, how='left')
    
df = pd.merge(df, indicators, how='outer', on=['Country','Year'])

# correct the country code of Greece that is missing
df.loc[(df.Country=='Greece')&(df.country_code.isna()),'country_code']='EL'

#### 4a. Changing the 1.3, 1.4  and 2.0 format of the data from long to wide and then we'll merge it with df

In [16]:
# ''' Victim offender relationship'''
# We filter our data to show only intimate partners and family, only numbers (not per 1k) and only female victims
vo_long = vo[(vo['Relationship_type']!='Total') \
             & (vo['Unit']=='Number')\
             & (vo['Victim_sex']=='Females')]\
.groupby(['Country','Year','Relationship_type'])['Value'].sum().unstack()\
.rename_axis(None, axis=1).reset_index()

vo_long.columns = ['Country','Year', 'intentional_family', 'intentional_partner']
vo_long['Int_homicide_by_family_and_partner'] = vo_long['intentional_family']+vo_long['intentional_partner']

# vo[(vo['Relationship_type']!='Total') \
#              & (vo['Unit']=='Number')\
#              & (vo['Victim_sex']=='Females')]\
# .groupby(['Country','Year'])['Value'].sum().reset_index()\
# .rename(columns = {'Value':'Int_homicide_by_family_and_partner'}).replace(0, np.nan)


In [17]:
# merging the victim offender data to our master dataframe (df)
df = pd.merge(df, vo_long, how='outer', on=['Country','Year'])

In [18]:
# changing from long to wide 1.4 (rape-sex assault victims)
dfrv = rape_vic[(rape_vic['STATUS']=='Victim')\
         &(rape_vic['ICCS - CRIME DEFINITION']!='Intentional homicide')\
         &(rape_vic['SEX']=='Females')\
         &(rape_vic['UNIT']=='Number')]\
.groupby(['Country','Year','ICCS - CRIME DEFINITION'])['Value'].sum().unstack()\
.rename_axis(None, axis=1).reset_index()

columns = ['Country','Year','female_rape_victims','female_sex_assault_victims']
dfrv.columns=columns

In [19]:
# merging dfrv to master df
df = pd.merge(df, dfrv, how='outer', on=['Country','Year'])
df.head()

Unnamed: 0,country_code,Country,Year,Population,Deaths,Female deaths,Female assaults deaths,Intentional female homicides,Collected by_x,Source_x,Unnamed: 10,female_population,vic_intimate_violence,reported_offences_int_violence,men_perpetrators_int_violence,physical_violence,psychological_violence,sexual_violence,economic_violence,rape_victims,femicides,homicide_total,pct of femicides to female homicide victims,protection_orders,men_prosecuted_for_int_violence,men_sentenced,men_held_in_prison,Collected by_y,Source_y,Link,NOTES,intentional_family,intentional_partner,Int_homicide_by_family_and_partner,female_rape_victims,female_sex_assault_victims
0,AT,Austria,2011,8375164.0,76142.0,39758.0,21.0,35.0,MIIR,Eurostat,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1131.0,743.0
1,AT,Austria,2012,8408121.0,78961.0,41803.0,21.0,38.0,MIIR,Eurostat,,4309977.0,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1117.0,780.0
2,AT,Austria,2013,8451860.0,79020.0,41378.0,23.0,30.0,MIIR,Eurostat,,4328238.0,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1146.0,660.0
3,AT,Austria,2014,8507786.0,77940.0,40662.0,17.0,23.0,MIIR,Eurostat,,4352447.0,10061.0,9904.0,,5785.0,4049.0,221.0,6.0,485.0,25.0,38.0,66%,,,,,MIIR,EIGE Report 2021 EIGE’s indicators on intimate...,https//eige.europa.eu/publications/eiges-indic...,,0.0,0.0,0.0,1061.0,696.0
4,AT,Austria,2015,8584926.0,83026.0,43159.0,31.0,17.0,MIIR,Eurostat,,4384529.0,10529.0,10297.0,,6172.0,4150.0,196.0,11.0,452.0,32.0,46.0,70%,,3648.0,433.0,,MIIR,EIGE Report 2021 EIGE’s indicators on intimate...,https//eige.europa.eu/publications/eiges-indic...,,14.0,0.0,14.0,1045.0,688.0


In [20]:
# changing the format of our perpetrators dataset in order to merge it with our master dataset
dfp_long = dfp[(dfp.SEX=='Males')&(dfp['ICSS CRIME']!='Intentional homicide')]\
.groupby(['Country','Year','LEGAL STATUS OF PERPETRATOR','ICSS CRIME'])['Value']\
.sum().unstack().rename_axis(None, axis=1).reset_index()

# turning it to wide
dfp_wide = dfp_long.pivot(index=['Year','Country'],columns=['LEGAL STATUS OF PERPETRATOR']).reset_index()

columns=['Year','Country','rape_convicted','rape_prosecuted','rape_suspected',
        'sex_assault_convicted','sex_assault_prosecuted','sex_assault_suspected']

# changing the column names
dfp_wide.columns=columns

In [21]:
df = pd.merge(df, dfp_wide, how='outer', on=['Country','Year'])

In [22]:
# reorder columns
df = df[['country_code',
 'Country',
 'Year',
 'Population',
 'female_population',
 'Deaths',
 'Female deaths',
 'Intentional female homicides',
 'intentional_family', 
 'intentional_partner',
 'Int_homicide_by_family_and_partner',
 'femicides',
 'homicide_total',
 'vic_intimate_violence',
 'reported_offences_int_violence',
 'men_perpetrators_int_violence',
 'physical_violence',
 'psychological_violence',
 'sexual_violence',
 'economic_violence',
 'rape_victims',
 'protection_orders',
 'men_prosecuted_for_int_violence',
 'men_sentenced',
 'men_held_in_prison',
 'female_rape_victims',
 'female_sex_assault_victims',
 'rape_convicted',
 'rape_prosecuted',
 'rape_suspected',
 'sex_assault_convicted',
 'sex_assault_prosecuted',
 'sex_assault_suspected']].copy()

In [23]:
df.head()

Unnamed: 0,country_code,Country,Year,Population,female_population,Deaths,Female deaths,Intentional female homicides,intentional_family,intentional_partner,Int_homicide_by_family_and_partner,femicides,homicide_total,vic_intimate_violence,reported_offences_int_violence,men_perpetrators_int_violence,physical_violence,psychological_violence,sexual_violence,economic_violence,rape_victims,protection_orders,men_prosecuted_for_int_violence,men_sentenced,men_held_in_prison,female_rape_victims,female_sex_assault_victims,rape_convicted,rape_prosecuted,rape_suspected,sex_assault_convicted,sex_assault_prosecuted,sex_assault_suspected
0,AT,Austria,2011,8375164.0,,76142.0,39758.0,35.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1131.0,743.0,189.54,1726.31,1110.58,223.34,2782.59,1911.75
1,AT,Austria,2012,8408121.0,4309977.0,78961.0,41803.0,38.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1117.0,780.0,174.15,1740.46,1114.55,218.2,2713.64,1995.53
2,AT,Austria,2013,8451860.0,4328238.0,79020.0,41378.0,30.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1146.0,660.0,194.61,1658.26,1124.63,111.64,2565.75,1541.5
3,AT,Austria,2014,8507786.0,4352447.0,77940.0,40662.0,23.0,0.0,0.0,0.0,25.0,38.0,10061.0,9904.0,,5785.0,4049.0,221.0,6.0,485.0,,,,,1061.0,696.0,189.45,1624.17,1110.09,122.89,2479.26,1581.15
4,AT,Austria,2015,8584926.0,4384529.0,83026.0,43159.0,17.0,14.0,0.0,14.0,32.0,46.0,10529.0,10297.0,,6172.0,4150.0,196.0,11.0,452.0,,3648.0,433.0,,1045.0,688.0,179.17,1603.33,1026.91,125.93,2508.4,1516.3


# Statistical calculations
We'll do two main calculations:
- percentage change (using the pandas .pct_change() function with fill_method=None
- normalization per 100k of female population *(value/population) multiplied with 100000 population rounded with one decimal*

In [24]:
# Function to calculate pct_change from year to year to a series of columns
# new columns will have pct_change in the column name
def pct_cng(data, column_names):
    '''
    Function to calculate pct_change from year to year to a series of columns
    new columns will have pct_change in the column name.
    data -> dataframe
    column_names -> list with the columns to be analyzed
    SOS- IT RETURNS THE NEW DATAFRAME
    '''
    for item in column_names:
        data[item+'_pct_change'] = round(data[item].pct_change(fill_method=None)*100,1)
    return data

In [25]:
# Main function for the analysis - per 100k
def analysis_per_X_women(data, columns, female_population, measurement):
    '''
    data = dataframe
    columns = either list with columns to analyze or string
    female_population = the population we want to analyze (e.g. female population or female deaths)
    measurement = it should be an integer. It's the number of the population to normalize. e.g. 100000 female population. 
    SOS- IT RETURNS THE NEW DATAFRAME
    '''
    if type(columns)==list:
        for item in columns:
            data[item+'_per_'+str(measurement)] = round((data[item]/data[female_population])*measurement,1)
        return data
    else:
        data[columns+'_per_'+str(measurement)] = round((data[columns]/data[female_population])*measurement,1)
        return data

### Running the function of percentage change

In [26]:
# The columns we'll feed the functions
columns_to_analyze_pct_change = ['Intentional female homicides', 'intentional_family', 'intentional_partner','Int_homicide_by_family_and_partner', 'femicides', 'homicide_total', 'vic_intimate_violence', 'reported_offences_int_violence', 'men_perpetrators_int_violence', 'physical_violence', 'psychological_violence', 'sexual_violence', 'economic_violence', 'rape_victims', 'protection_orders', 'men_prosecuted_for_int_violence', 'men_sentenced', 'men_held_in_prison', 'rape_convicted', 'rape_prosecuted', 'rape_suspected', 'sex_assault_convicted', 'sex_assault_prosecuted', 'sex_assault_suspected','female_rape_victims','female_sex_assault_victims']

In [27]:
df = pct_cng(df, columns_to_analyze_pct_change)

### Running the function of per 100k

In [28]:
# The columns we'll feed the function - for female population
columns_to_analyze_per_100k_female_pop = ['Intentional female homicides','intentional_family', 'intentional_partner', 'Int_homicide_by_family_and_partner', 'femicides', 'homicide_total', 'vic_intimate_violence', 'reported_offences_int_violence', 'physical_violence', 'psychological_violence', 'sexual_violence', 'economic_violence', 'rape_victims','female_rape_victims','female_sex_assault_victims']
columns_to_analyze_per_100k_male_pop = ['men_perpetrators_int_violence','protection_orders', 'men_prosecuted_for_int_violence', 'men_sentenced', 'men_held_in_prison', 'rape_convicted', 'rape_prosecuted', 'rape_suspected', 'sex_assault_convicted', 'sex_assault_prosecuted', 'sex_assault_suspected']

In [29]:
df['male_population'] = df.Population-df.female_population

In [30]:
df = analysis_per_X_women(df, columns_to_analyze_per_100k_female_pop, 'female_population', 100000)

In [31]:
df = analysis_per_X_women(df, columns_to_analyze_per_100k_male_pop, 'male_population', 100000)

In [32]:
df.head()

Unnamed: 0,country_code,Country,Year,Population,female_population,Deaths,Female deaths,Intentional female homicides,intentional_family,intentional_partner,Int_homicide_by_family_and_partner,femicides,homicide_total,vic_intimate_violence,reported_offences_int_violence,men_perpetrators_int_violence,physical_violence,psychological_violence,sexual_violence,economic_violence,rape_victims,protection_orders,men_prosecuted_for_int_violence,men_sentenced,men_held_in_prison,female_rape_victims,female_sex_assault_victims,rape_convicted,rape_prosecuted,rape_suspected,sex_assault_convicted,sex_assault_prosecuted,sex_assault_suspected,Intentional female homicides_pct_change,intentional_family_pct_change,intentional_partner_pct_change,Int_homicide_by_family_and_partner_pct_change,femicides_pct_change,homicide_total_pct_change,vic_intimate_violence_pct_change,reported_offences_int_violence_pct_change,men_perpetrators_int_violence_pct_change,physical_violence_pct_change,psychological_violence_pct_change,sexual_violence_pct_change,economic_violence_pct_change,rape_victims_pct_change,protection_orders_pct_change,men_prosecuted_for_int_violence_pct_change,men_sentenced_pct_change,men_held_in_prison_pct_change,rape_convicted_pct_change,rape_prosecuted_pct_change,rape_suspected_pct_change,sex_assault_convicted_pct_change,sex_assault_prosecuted_pct_change,sex_assault_suspected_pct_change,female_rape_victims_pct_change,female_sex_assault_victims_pct_change,male_population,Intentional female homicides_per_100000,intentional_family_per_100000,intentional_partner_per_100000,Int_homicide_by_family_and_partner_per_100000,femicides_per_100000,homicide_total_per_100000,vic_intimate_violence_per_100000,reported_offences_int_violence_per_100000,physical_violence_per_100000,psychological_violence_per_100000,sexual_violence_per_100000,economic_violence_per_100000,rape_victims_per_100000,female_rape_victims_per_100000,female_sex_assault_victims_per_100000,men_perpetrators_int_violence_per_100000,protection_orders_per_100000,men_prosecuted_for_int_violence_per_100000,men_sentenced_per_100000,men_held_in_prison_per_100000,rape_convicted_per_100000,rape_prosecuted_per_100000,rape_suspected_per_100000,sex_assault_convicted_per_100000,sex_assault_prosecuted_per_100000,sex_assault_suspected_per_100000
0,AT,Austria,2011,8375164.0,,76142.0,39758.0,35.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1131.0,743.0,189.54,1726.31,1110.58,223.34,2782.59,1911.75,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,AT,Austria,2012,8408121.0,4309977.0,78961.0,41803.0,38.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1117.0,780.0,174.15,1740.46,1114.55,218.2,2713.64,1995.53,8.6,,,,,,,,,,,,,,,,,,-8.1,0.8,0.4,-2.3,-2.5,4.4,-1.2,5.0,4098144.0,0.9,0.0,0.0,0.0,,,,,,,,,,25.9,18.1,,,,,,4.2,42.5,27.2,5.3,66.2,48.7
2,AT,Austria,2013,8451860.0,4328238.0,79020.0,41378.0,30.0,0.0,0.0,0.0,,,,,,,,,,,,,,,1146.0,660.0,194.61,1658.26,1124.63,111.64,2565.75,1541.5,-21.1,,,,,,,,,,,,,,,,,,11.7,-4.7,0.9,-48.8,-5.4,-22.8,2.6,-15.4,4123622.0,0.7,0.0,0.0,0.0,,,,,,,,,,26.5,15.2,,,,,,4.7,40.2,27.3,2.7,62.2,37.4
3,AT,Austria,2014,8507786.0,4352447.0,77940.0,40662.0,23.0,0.0,0.0,0.0,25.0,38.0,10061.0,9904.0,,5785.0,4049.0,221.0,6.0,485.0,,,,,1061.0,696.0,189.45,1624.17,1110.09,122.89,2479.26,1581.15,-23.3,,,,,,,,,,,,,,,,,,-2.7,-2.1,-1.3,10.1,-3.4,2.6,-7.4,5.5,4155339.0,0.5,0.0,0.0,0.0,0.6,0.9,231.2,227.6,132.9,93.0,5.1,0.1,11.1,24.4,16.0,,,,,,4.6,39.1,26.7,3.0,59.7,38.1
4,AT,Austria,2015,8584926.0,4384529.0,83026.0,43159.0,17.0,14.0,0.0,14.0,32.0,46.0,10529.0,10297.0,,6172.0,4150.0,196.0,11.0,452.0,,3648.0,433.0,,1045.0,688.0,179.17,1603.33,1026.91,125.93,2508.4,1516.3,-26.1,inf,,inf,28.0,21.1,4.7,4.0,,6.7,2.5,-11.3,83.3,-6.8,,,,,-5.4,-1.3,-7.5,2.5,1.2,-4.1,-1.5,-1.1,4200397.0,0.4,0.3,0.0,0.3,0.7,1.0,240.1,234.8,140.8,94.7,4.5,0.3,10.3,23.8,15.7,,,86.8,10.3,,4.3,38.2,24.4,3.0,59.7,36.1


### Calculate from the EIGE report, how many of the perpetrators were sentenced and how many were eventually ended up in jail

A lack of comparability between jurisdictions’
data on male perpetrators sentenced
(indicator 12) is due to the use of the number of
convictions (42), the number of cases (43) and the
number of inquiries resulting in an indictment (44)
as a unit of analysis, instead of male perpetrators.

<i> EIGE page 33</i>

Comparable countries for indicator 3:
- Belgium, Czechia, Slovenia, Finland, Sweden

For indicators 11,12,13 <i>(EIGE page 23, comments 24 & 25)</i>
- Greece, Croatia, Italy, Austria, Portugal, Slovenia and the United Kingdom the data collected does not adhere to EIGE’s indicator because they relate to domestic violence

- These Member States collect data on domestic violence offences within the intimate relationship: Belgium; Czechia, which excludes economic violence from the definition; Estonia, where the definition is almost limited to physical violence – damage to the health of another person and physical abuse that causes pain committed in a close relationship or relationship of subordination; and France, where the term is defined as violence conjugales.

In [33]:
df['pct_of_jailed_from_prosecuted'] = round((df['men_held_in_prison']/df['men_prosecuted_for_int_violence'])*100, 1)


In [34]:
df['pct_of_sentenced_from_prosecuted'] = round((df['men_sentenced']/df['men_prosecuted_for_int_violence'])*100, 1)


# Adding filters for EIGE comparable countries

### Femicides comparable countries 
- True = comparable

In [35]:
# Countries with comparable data for femicides - EIGE page 29, footnote 30
comparable_femicides_countries = ['Czechia', 'Germany', 'Spain', 'France', 'Italy', 'Latvia', 'Lithuania', 'Malta', 'Netherlands', 'Slovenia', 'Slovakia', 'Finland', 'Sweden']
df['comparable_femicides_countries'] = df['Country'].isin(comparable_femicides_countries)

### Reported offences comparable countries

In [36]:
# Countries with comparable data for reported offences EIGE page 32
comparable_reported_offences = ['Czechia', 'Spain', 'Croatia', 'Latvia', 'Slovenia', 'Sweden']
df['comparable_countries_reported_offences'] = df['Country'].isin(comparable_reported_offences)

# Export master dataframe

In [37]:
df.to_csv('master_df.csv', index=False)

# Analysis

We will save two auto-generated datasets. The first will have the overview and the second will be country based. The second one has the following keys:
- 'Country'
- 'Year'
- 'Text'

In [38]:
# List that has the generic autogenerated text
generic_results = []

# List that saves the country based results
analysis_results = []
text_results = {}

## Functions for analysis

In [39]:
def extract_info(df, location, value):
    '''
    When we have a sorted list (e.g. countries with highest number of femicides),
    this function will return the data we want based on their location in the sorted list. 
    e.g. extract_info(df, 5,'femicides') will return the fifth value of femicides in the dataframe
    '''
    value_1 = df.iloc[location][value[0]]
    value_2 = df.iloc[location][value[1]]
    value_3 = df.iloc[location][value[2]]
    return value_1, value_2, value_3

# example - Country 
extract_info(df.sort_values(by='femicides_pct_change', ascending=False), 0,['Country','Year','femicides_pct_change'])

('Malta', 2018, inf)

In [40]:
def check_data(value):
    '''
    It's used in the f strings
    it checks if the value is NaN. If it is, it will return NO DATA.
    '''
    return value if ~np.isnan(value) else '(NO DATA)'
# example
check_data(np.nan)

'(NO DATA)'

In [41]:
# function that will add the text increase or decrease based on the given number
def inc_dec(data1):
    '''
    This function will return the word 'increase' or 'decrease' depending 
    of the value that you'll feed it. If it's a negative value, it will return decrease.
    '''
    if isinstance(data1, float):
        if data1>0:
            return "increase"
        else:
            return 'decrease'
    else:
        return ""
# example
inc_dec(-181.0)

'decrease'

In [42]:
# function that will calculate pct_change of each the give value of each country and year, from 2019 
def compare_to_date(df,country,value,year, base_year):
    '''
    This function will return the pct_change between two values (year and base year).
    It will also return the absolute numbers
    df -> your dataframe
    country -> the country you want to examine (that is included in your dataframe)
    value -> the column name of your dataframe that you want to extract your value from
    year -> the year you want your value to reflect
    
    e.g. compare_to_2019(df, 'Greece','femicides',2021) will return the pct_change of 
    femicides in 2021 compared to 2019 (before the pandemic)
    '''
    try:
        before = df[(df['Year']==base_year) & (df['Country']==country)][value].item()
        after = df[(df['Year']==year) & (df['Country']==country)][value].item()
        change = round(((after-before)/before)*100,1)
        return change, after, before
    except:
        return "no data"
# example
compare_to_date(df, 'Greece','physical_violence',2021, 2019)

(300.3, 6873.0, 1717.0)

In [43]:
def compare_to_date_text(dataframe, list_of_countries, value, years, base_year):
    '''
    This function compares data to 2019. In order to work, you need to load the functions
    - compare_to_date()
    - abs_number()
    e.g. It will compare the 'femicides' of 2020 to femicides of 2019, or femicides
    of 2021 to femicides to 2019.
    dataframe -> your dataframe
    list_of_countries -> list of the countries that exist in your dataframe
    value -> the column name that you want to compare to base year
    '''
    results_list=[]
    for country in list_of_countries:
        for year in years:
            dictionary={}
            number = compare_to_date(dataframe, country, value,year, base_year)
            if isinstance(number[0],float) and ~np.isnan(number[0]):
                result_text = f'{country} in {year} had a {number[0]}% ({number[1]}) {inc_dec(number[0])} in {value} compared to {base_year} ({number[2]}).'
                
                dictionary['Country']=country
                dictionary['Year']=year
                dictionary[value+'_text'] = result_text
                results_list.append(dictionary)
            else:
                pass
    return results_list
# example
compare_to_date_text(df, ['Greece'],'psychological_violence', [2020, 2021], 2019)

[{'Country': 'Greece',
  'Year': 2020,
  'psychological_violence_text': 'Greece in 2020 had a 104.6% (2906.0) increase in psychological_violence compared to 2019 (1420.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'psychological_violence_text': 'Greece in 2021 had a 276.8% (5350.0) increase in psychological_violence compared to 2019 (1420.0).'}]

----------------------------------------

## Femicides OTHER SOURCES

In [44]:
# difference in official data with data from other sources
url_other_sources = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQbjKuY8nj2OVl-fvtxLaEWs2ubMPzZaaFIA4_8k6Nh5Wi1nYmKh7QKhlPWpnKLGw1EGu2LhflRKwqM/pub?gid=1690417396&single=true&output=csv'
col_fem = "Number of femicides deriving from other sources (not police/government statistics, but monitor groups, ngo's, etc) - IF available and different than official records"

In [45]:
os = read(url_other_sources)[['Country', 'Year', col_fem]]
os.rename(columns={col_fem:'unofficial_femicides'},inplace=True)
os['clean_unofficial_femicides'] = os['unofficial_femicides'].str.extract(r'(\d+)').astype(float)

In [46]:
# COMPARE OFFICIAL SOURCES WITH UNOFFICIAL
fem = pd.merge(df[['Country','Year','femicides']],os, how='left').dropna(subset=['femicides', 'clean_unofficial_femicides'])[['Country','Year','femicides','clean_unofficial_femicides']].reset_index(drop=True)

In [47]:
# Relationship between official and unofficial recordings of femicides
fem['pct_difference'] = round((fem['clean_unofficial_femicides']/fem['femicides']),1)

In [48]:
def compare_official_unofficial_femicides_text(df):
    '''
    text generator
    '''
    results = []
    for item in range(len(df)):
        result={}
        extracted = extract_info(df, item, ['Country','Year','pct_difference'])
        text = f"In {extracted[0]} in {extracted[1]} there were {extracted[2]} times unofficial femicides to official data"
        result['Country'] = extracted[0]
        result['Year'] = extracted[1]
        result['text_official_unofficial_femicides'] = text
        results.append(result)
    return results

analysis_results.append(compare_official_unofficial_femicides_text(fem))


---------------------------------

# Questions

### 1 How many countries are we examining. Which years? Which Countries?

In [49]:
# Countries we are examining
num_countries = len(list(set(df.Country.tolist())))
countries = list(set(df.Country.tolist()))

In [50]:
# Years we are examining
years = sorted(list(set(df.Year.tolist())))

In [51]:
generic_results.append(f"We are examinging {num_countries} countries \
for the years from {years[0]} to year {years[-1]}. \
The countries we are examining are {', '.join(countries)}. \
We will focus from 2015 to 2021")



### 2. What is the total number of femicides and intentional homicides from family and partners 

In [52]:
year_start = sorted(df.loc[df.femicides.notna()]['Year'].tolist())[0]
year_end = sorted(df.loc[df.femicides.notna()]['Year'].tolist())[-1]
countries = df.loc[df.femicides.notna()]['Country'].unique()
countries_no_fem = list(set([item for item in df.Country.tolist() if item not in countries]))

generic_results.append(f"According to our data we have {df.femicides.sum()} femicides. This number corresponds to \
data from {year_start} to {year_end} for the following countries {', '.join(countries)}. We count only the number \
of femicides according to the EIGE indicator (number 9). We don't have corresponding data for \
the following countries: {', '.join(countries_no_fem)}.")

In [53]:
all_int = df['Int_homicide_by_family_and_partner'].sum()
partner_int = df['intentional_partner'].sum()
family_int = df['intentional_family'].sum()
year_start = sorted(df.loc[df.Int_homicide_by_family_and_partner.notna()]['Year'].tolist())[0]
year_end = sorted(df.loc[df.Int_homicide_by_family_and_partner.notna()]['Year'].tolist())[-1]
countries = df.loc[df.Int_homicide_by_family_and_partner.notna()]['Country'].unique()
countries_no_fem = list(set([item for item in df.Country.tolist() if item not in countries]))

generic_results.append(f"According to the Eurostat data we have {all_int} intentional homicides of women from \
intimate partners ({partner_int}) and family members and relatives ({family_int}). This number corresponds to \
data from {year_start} to {year_end} for the following countries {', '.join(countries)}. We count only the number \
of femicides according to the EIGE indicator (number 9). We don't have corresponding data for \
the following countries: {', '.join(countries_no_fem)}.")


### 3 Which countries had the highest increase in femicides? Which the lowest? And which years?

In [54]:
# Countries with the biggest increace in femicides ALL YEARS - Malta went from 0 to 2: we hardcoded the increase to be 100%

df[df.femicides_pct_change.notna()].replace(np.inf, 100)\
.sort_values(by='femicides_pct_change', ascending=False)\
[['Country','Year','femicides_pct_change']]


Unnamed: 0,Country,Year,femicides_pct_change
142,Greece,2021,187.5
331,Sweden,2018,120.0
134,Greece,2013,116.7
88,Estonia,2015,100.0
309,Slovenia,2020,100.0
223,Malta,2018,100.0
221,Malta,2016,100.0
139,Greece,2018,85.7
101,Finland,2016,83.3
294,Slovakia,2017,83.3


### 4 Which countries had an increase in femicides in 2020 (first year of covid) compared to 2019 (before covid)?

In [55]:
# Countries with the biggest increace in femicides in 2020 compared to 2019 onward --- COVID
df[(df.femicides_pct_change.notna()) & (df['Year']==2020)].replace(np.inf, 100).sort_values(by='femicides_pct_change', ascending=False)[['Country','Year','femicides_pct_change']].head(10)


Unnamed: 0,Country,Year,femicides_pct_change
309,Slovenia,2020,100.0
129,Germany,2020,18.8
141,Greece,2020,0.0
153,Hungary,2020,0.0
285,Serbia,2020,0.0
177,Italy,2020,-1.5
333,Sweden,2020,-18.8
57,Cyprus,2020,-44.4


### 5 Which countries had an increase in femicides in 2021 (second year of covid) compared to 2020 (first year of covid)?

In [56]:
df[(df.femicides_pct_change.notna()) & (df['Year']==2021)].replace(np.inf, 100).sort_values(by='femicides_pct_change', ascending=False)[['Country','Year','femicides_pct_change']]


Unnamed: 0,Country,Year,femicides_pct_change
142,Greece,2021,187.5
334,Sweden,2021,15.4
178,Italy,2021,4.5
58,Cyprus,2021,0.0
154,Hungary,2021,-5.9
286,Serbia,2021,-31.6
310,Slovenia,2021,-60.0


##### AUTO TEXT FEMICIDES

In [57]:
femicides_auto_text = compare_to_date_text(df, df.Country.unique(),'femicides',[2020, 2021], 2019)
analysis_results.append(femicides_auto_text)
femicides_auto_text

[{'Country': 'Cyprus',
  'Year': 2020,
  'femicides_text': 'Cyprus in 2020 had a -44.4% (5.0) decrease in femicides compared to 2019 (9.0).'},
 {'Country': 'Cyprus',
  'Year': 2021,
  'femicides_text': 'Cyprus in 2021 had a -44.4% (5.0) decrease in femicides compared to 2019 (9.0).'},
 {'Country': 'Germany',
  'Year': 2020,
  'femicides_text': 'Germany in 2020 had a 18.8% (139.0) increase in femicides compared to 2019 (117.0).'},
 {'Country': 'Greece',
  'Year': 2020,
  'femicides_text': 'Greece in 2020 had a 0.0% (8.0) decrease in femicides compared to 2019 (8.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'femicides_text': 'Greece in 2021 had a 187.5% (23.0) increase in femicides compared to 2019 (8.0).'},
 {'Country': 'Hungary',
  'Year': 2020,
  'femicides_text': 'Hungary in 2020 had a 0.0% (17.0) decrease in femicides compared to 2019 (17.0).'},
 {'Country': 'Hungary',
  'Year': 2021,
  'femicides_text': 'Hungary in 2021 had a -5.9% (16.0) decrease in femicides compared to 2019 (

### 6 Which countries had an increase in intentional homicides of women by partners and family members in 2020 (first year of covid) compared to 2019 (before covid)?

In [58]:
df[(df.Int_homicide_by_family_and_partner_pct_change.notna()) & (df['Year']==2020)].replace(np.inf, 100).sort_values(by='Int_homicide_by_family_and_partner_pct_change', ascending=False)[['Country','Year','Int_homicide_by_family_and_partner_pct_change']]


Unnamed: 0,Country,Year,Int_homicide_by_family_and_partner_pct_change
309,Slovenia,2020,100.0
141,Greece,2020,100.0
45,Croatia,2020,55.6
9,Austria,2020,28.6
153,Hungary,2020,26.1
237,Netherlands,2020,25.8
129,Germany,2020,20.3
201,Lithuania,2020,18.2
321,Spain,2020,13.2
69,Czechia,2020,10.5


### 7 Which countries had an increase in intentional homicides of women by partners and family members in 2021 (second year of covid) compared to 2020 (first year of covid)?

In [59]:
df[(df.Int_homicide_by_family_and_partner_pct_change.notna()) & (df['Year']==2021)].replace(np.inf, 100).sort_values(by='Int_homicide_by_family_and_partner_pct_change', ascending=False)[['Country','Year','Int_homicide_by_family_and_partner_pct_change']].head(10)


Unnamed: 0,Country,Year,Int_homicide_by_family_and_partner_pct_change
142,Greece,2021,155.6
310,Slovenia,2021,0.0
130,Germany,2021,-100.0


##### AUTO TEXT INTENTIONAL HOMICIDES BY PARTNERS & FAMILY MEMBERS

In [60]:
int_fam_partners_auto_text = compare_to_date_text(df, df.Country.unique(),'Int_homicide_by_family_and_partner',[2020, 2021], 2019)
analysis_results.append(int_fam_partners_auto_text)
int_fam_partners_auto_text

[{'Country': 'Austria',
  'Year': 2020,
  'Int_homicide_by_family_and_partner_text': 'Austria in 2020 had a 28.6% (27.0) increase in Int_homicide_by_family_and_partner compared to 2019 (21.0).'},
 {'Country': 'Croatia',
  'Year': 2020,
  'Int_homicide_by_family_and_partner_text': 'Croatia in 2020 had a 55.6% (14.0) increase in Int_homicide_by_family_and_partner compared to 2019 (9.0).'},
 {'Country': 'Czechia',
  'Year': 2020,
  'Int_homicide_by_family_and_partner_text': 'Czechia in 2020 had a 10.5% (21.0) increase in Int_homicide_by_family_and_partner compared to 2019 (19.0).'},
 {'Country': 'Finland',
  'Year': 2020,
  'Int_homicide_by_family_and_partner_text': 'Finland in 2020 had a -100.0% (0.0) decrease in Int_homicide_by_family_and_partner compared to 2019 (19.0).'},
 {'Country': 'France',
  'Year': 2020,
  'Int_homicide_by_family_and_partner_text': 'France in 2020 had a 1.4% (148.0) increase in Int_homicide_by_family_and_partner compared to 2019 (146.0).'},
 {'Country': 'Germany

### 8 Highest increase in intimate victim violence > 2015 - 2019

In [61]:
df[(df['reported_offences_int_violence_pct_change'].notna()) & (df.Year>2014)]\
.sort_values(by='reported_offences_int_violence_pct_change', ascending=False)\
[['Country','Year','reported_offences_int_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,reported_offences_int_violence_pct_change
142,Greece,2021,54.8
139,Greece,2018,50.7
293,Slovakia,2016,43.0
42,Croatia,2017,26.2
331,Sweden,2018,26.0
31,Bulgaria,2018,22.2
187,Latvia,2018,14.8
292,Slovakia,2015,13.4
28,Bulgaria,2015,11.9
309,Slovenia,2020,10.6


### 9 Highest increase in intimate victim violence > 2020 & 2021

In [62]:
df[(df['reported_offences_int_violence_pct_change'].notna()) & (df.Year>2019)]\
.sort_values(by='reported_offences_int_violence_pct_change', ascending=False)\
[['Country','Year','reported_offences_int_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,reported_offences_int_violence_pct_change
142,Greece,2021,54.8
309,Slovenia,2020,10.6
141,Greece,2020,8.6
310,Slovenia,2021,-12.3


##### AUTO TEXT INTIMATE VIOLENCE

In [63]:
int_violence_auto_text = compare_to_date_text(df, df.Country.unique(),'reported_offences_int_violence',[2020, 2021], 2019)
analysis_results.append(int_violence_auto_text)
int_violence_auto_text


[{'Country': 'Greece',
  'Year': 2020,
  'reported_offences_int_violence_text': 'Greece in 2020 had a 8.6% (5669.0) increase in reported_offences_int_violence compared to 2019 (5220.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'reported_offences_int_violence_text': 'Greece in 2021 had a 68.1% (8776.0) increase in reported_offences_int_violence compared to 2019 (5220.0).'},
 {'Country': 'Slovenia',
  'Year': 2020,
  'reported_offences_int_violence_text': 'Slovenia in 2020 had a 10.6% (1145.0) increase in reported_offences_int_violence compared to 2019 (1035.0).'},
 {'Country': 'Slovenia',
  'Year': 2021,
  'reported_offences_int_violence_text': 'Slovenia in 2021 had a -3.0% (1004.0) decrease in reported_offences_int_violence compared to 2019 (1035.0).'}]

### 10 Increase in physical - psychological - economical - sexual violence (EIGE)

#### 10a Physical violence increase 2015>2021 (top 10 countries)

In [64]:
# Countries with the biggest increace in PHYSICAL VIOLENCE FROM 2015 onward --- COVID
df[(df['physical_violence_pct_change'].notna())&(df['Year']>2014)]\
.sort_values('physical_violence_pct_change',ascending=False)\
[['Country','Year','physical_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,physical_violence_pct_change
141,Greece,2020,110.2
142,Greece,2021,90.4
280,Serbia,2015,53.5
282,Serbia,2017,35.4
331,Sweden,2018,25.9
187,Latvia,2018,24.4
281,Serbia,2016,19.4
30,Bulgaria,2017,12.9
283,Serbia,2018,10.2
42,Croatia,2017,7.5


#### 10a Physical violence increase before - after the start of the pandemic (top 10 countries)


In [65]:
# Countries with the biggest increace in PHYSICAL VIOLENCE FROM 2019 onward --- COVID
df[(df['physical_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('physical_violence_pct_change',ascending=False)\
[['Country','Year','physical_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,physical_violence_pct_change
141,Greece,2020,110.2
142,Greece,2021,90.4
129,Germany,2020,5.1
286,Serbia,2021,-2.4
285,Serbia,2020,-2.8


##### AUTO TEXT PHYSICAL VIOLENCE

In [66]:
physical_violence_auto_text = compare_to_date_text(df, df.Country.unique(),'physical_violence',[2020, 2021], 2019)
analysis_results.append(physical_violence_auto_text)
physical_violence_auto_text

[{'Country': 'Germany',
  'Year': 2020,
  'physical_violence_text': 'Germany in 2020 had a 5.1% (78294.0) increase in physical_violence compared to 2019 (74520.0).'},
 {'Country': 'Greece',
  'Year': 2020,
  'physical_violence_text': 'Greece in 2020 had a 110.2% (3609.0) increase in physical_violence compared to 2019 (1717.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'physical_violence_text': 'Greece in 2021 had a 300.3% (6873.0) increase in physical_violence compared to 2019 (1717.0).'},
 {'Country': 'Serbia',
  'Year': 2020,
  'physical_violence_text': 'Serbia in 2020 had a -2.8% (9725.0) decrease in physical_violence compared to 2019 (10009.0).'},
 {'Country': 'Serbia',
  'Year': 2021,
  'physical_violence_text': 'Serbia in 2021 had a -5.2% (9489.0) decrease in physical_violence compared to 2019 (10009.0).'}]

#### 10b Psychologhical violence increase 2015>2021 (top 10 countries)

In [67]:
df[(df['psychological_violence_pct_change'].notna())&(df['Year']>2014)]\
.sort_values('psychological_violence_pct_change',ascending=False)\
[['Country','Year','psychological_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,psychological_violence_pct_change
141,Greece,2020,104.6
184,Latvia,2015,100.0
142,Greece,2021,84.1
282,Serbia,2017,60.0
293,Slovakia,2016,56.1
281,Serbia,2016,42.2
187,Latvia,2018,30.8
280,Serbia,2015,29.0
42,Croatia,2017,27.9
283,Serbia,2018,26.0


#### 10b Psychologhical violence increase before - after the start of the pandemic (top 10 countries)

In [68]:
df[(df['psychological_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('psychological_violence_pct_change',ascending=False)\
[['Country','Year','psychological_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,psychological_violence_pct_change
141,Greece,2020,104.6
142,Greece,2021,84.1
286,Serbia,2021,6.1
285,Serbia,2020,3.4
129,Germany,2020,1.5


##### AUTO TEXT PSYCHOLOGICAL VIOLENCE

In [69]:
psychological_violence_auto_text = compare_to_date_text(df, df.Country.unique(),'psychological_violence',[2020, 2021], 2019)
analysis_results.append(psychological_violence_auto_text)
psychological_violence_auto_text

[{'Country': 'Germany',
  'Year': 2020,
  'psychological_violence_text': 'Germany in 2020 had a 1.5% (26905.0) increase in psychological_violence compared to 2019 (26515.0).'},
 {'Country': 'Greece',
  'Year': 2020,
  'psychological_violence_text': 'Greece in 2020 had a 104.6% (2906.0) increase in psychological_violence compared to 2019 (1420.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'psychological_violence_text': 'Greece in 2021 had a 276.8% (5350.0) increase in psychological_violence compared to 2019 (1420.0).'},
 {'Country': 'Serbia',
  'Year': 2020,
  'psychological_violence_text': 'Serbia in 2020 had a 3.4% (9983.0) increase in psychological_violence compared to 2019 (9653.0).'},
 {'Country': 'Serbia',
  'Year': 2021,
  'psychological_violence_text': 'Serbia in 2021 had a 9.7% (10588.0) increase in psychological_violence compared to 2019 (9653.0).'}]

#### 10c Economic violence increase 2015>2021 (top 10 countries)

In [70]:
df[(df['economic_violence_pct_change'].notna())&(df['Year']>2014)]\
.sort_values('economic_violence_pct_change',ascending=False)\
[['Country','Year','economic_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,economic_violence_pct_change
319,Spain,2018,296.7
292,Slovakia,2015,109.4
66,Czechia,2017,89.5
4,Austria,2015,83.3
102,Finland,2017,70.0
101,Finland,2016,66.7
65,Czechia,2016,26.7
67,Czechia,2018,26.4
6,Austria,2017,16.7
129,Germany,2020,12.7


#### 10b Economic violence increase before - after the start of the pandemic (top 10 countries)

In [71]:
df[(df['economic_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('economic_violence_pct_change',ascending=False)\
[['Country','Year','economic_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,economic_violence_pct_change
129,Germany,2020,12.7
285,Serbia,2020,6.4
286,Serbia,2021,-1.0


##### AUTO TEXT ECONOMIC VIOLENCE

In [72]:
economic_violence_auto_text = compare_to_date_text(df, df.Country.unique(),'economic_violence',[2020, 2021], 2019)
analysis_results.append(economic_violence_auto_text)
economic_violence_auto_text

[{'Country': 'Germany',
  'Year': 2020,
  'economic_violence_text': 'Germany in 2020 had a 12.7% (71.0) increase in economic_violence compared to 2019 (63.0).'},
 {'Country': 'Serbia',
  'Year': 2020,
  'economic_violence_text': 'Serbia in 2020 had a 6.4% (415.0) increase in economic_violence compared to 2019 (390.0).'},
 {'Country': 'Serbia',
  'Year': 2021,
  'economic_violence_text': 'Serbia in 2021 had a 5.4% (411.0) increase in economic_violence compared to 2019 (390.0).'}]

#### 10d Sexual violence increase 2015>2021 (top 10 countries)

In [73]:
df[(df['sexual_violence_pct_change'].notna())&(df['Year']>2014)]\
.sort_values('sexual_violence_pct_change',ascending=False)\
[['Country','Year','sexual_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,sexual_violence_pct_change
186,Latvia,2017,200.0
52,Cyprus,2015,163.2
294,Slovakia,2017,133.3
141,Greece,2020,115.6
142,Greece,2021,104.3
197,Lithuania,2016,100.0
199,Lithuania,2018,100.0
293,Slovakia,2016,80.0
286,Serbia,2021,76.0
284,Serbia,2019,70.2


#### 10d Sexual violence increase before - after the start of the pandemic (top 10 countries)

In [74]:
df[(df['sexual_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('sexual_violence_pct_change',ascending=False)\
[['Country','Year','sexual_violence_pct_change']].head(10)

Unnamed: 0,Country,Year,sexual_violence_pct_change
141,Greece,2020,115.6
142,Greece,2021,104.3
286,Serbia,2021,76.0
309,Slovenia,2020,64.3
153,Hungary,2020,20.8
310,Slovenia,2021,17.4
129,Germany,2020,8.0
154,Hungary,2021,-6.3
285,Serbia,2020,-52.6


##### AUTO TEXT SEXUAL VIOLENCE

In [75]:
sexual_violence_auto_text = compare_to_date_text(df, df.Country.unique(),'sexual_violence',[2020, 2021], 2019)
analysis_results.append(sexual_violence_auto_text)
sexual_violence_auto_text

[{'Country': 'Germany',
  'Year': 2020,
  'sexual_violence_text': 'Germany in 2020 had a 8.0% (2677.0) increase in sexual_violence compared to 2019 (2478.0).'},
 {'Country': 'Greece',
  'Year': 2020,
  'sexual_violence_text': 'Greece in 2020 had a 115.6% (69.0) increase in sexual_violence compared to 2019 (32.0).'},
 {'Country': 'Greece',
  'Year': 2021,
  'sexual_violence_text': 'Greece in 2021 had a 340.6% (141.0) increase in sexual_violence compared to 2019 (32.0).'},
 {'Country': 'Hungary',
  'Year': 2020,
  'sexual_violence_text': 'Hungary in 2020 had a 20.8% (1157.0) increase in sexual_violence compared to 2019 (958.0).'},
 {'Country': 'Hungary',
  'Year': 2021,
  'sexual_violence_text': 'Hungary in 2021 had a 13.2% (1084.0) increase in sexual_violence compared to 2019 (958.0).'},
 {'Country': 'Serbia',
  'Year': 2020,
  'sexual_violence_text': 'Serbia in 2020 had a -52.6% (100.0) decrease in sexual_violence compared to 2019 (211.0).'},
 {'Country': 'Serbia',
  'Year': 2021,
  's

### 11 Female sexual assault & rape victims (EUROSTAT)

#### Rape Victims > 2015

In [105]:
# We exclude the infinite numbers 
# (e.g. when a country had 0 rapes and the next year had 1 rape, then this is an inf number -> equals to 100% increase)
df[(df['female_rape_victims_pct_change'].notna())\
&(df['Year']>2014)\
&(df['female_rape_victims_pct_change']!=np.inf)]\
.sort_values('female_rape_victims_pct_change',ascending=False)\
[['Country','Year','female_rape_victims_pct_change']]

Unnamed: 0,Country,Year,female_rape_victims_pct_change
282,Serbia,2017,120.4
220,Malta,2015,109.1
209,Luxembourg,2016,77.8
185,Latvia,2016,68.8
224,Malta,2019,61.1
44,Croatia,2019,58.5
149,Hungary,2016,56.6
197,Lithuania,2016,48.9
306,Slovenia,2017,42.9
153,Hungary,2020,41.2


#### Rape victims >2019

In [106]:
df[(df['female_rape_victims_pct_change'].notna())\
&(df['Year']>2019)\
&(df['female_rape_victims_pct_change']!=np.inf)]\
.sort_values('female_rape_victims_pct_change',ascending=False)\
[['Country','Year','female_rape_victims_pct_change']]

Unnamed: 0,Country,Year,female_rape_victims_pct_change
153,Hungary,2020,41.2
141,Greece,2020,36.5
310,Slovenia,2021,34.1
70,Czechia,2021,31.4
322,Spain,2021,28.2
273,Romania,2020,22.5
309,Slovenia,2020,13.9
117,France,2020,11.4
333,Sweden,2020,8.2
129,Germany,2020,6.3


### IN HOW MANY COUNTRIES HAS INCREASED VS DECREASED BY YEAR

In [161]:
t = (df[df.female_rape_victims_pct_change.notna()]\
 .groupby(['Year','Country'])['female_rape_victims_pct_change']\
 .sum()>0).reset_index()

# True = number of countries that rape victims have increased
t.groupby('Year').female_rape_victims_pct_change.value_counts().to_frame().unstack().droplevel(0, axis=1).reset_index()

female_rape_victims_pct_change,Year,False,True
0,2012,7,12
1,2013,12,8
2,2014,11,9
3,2015,11,10
4,2016,4,18
5,2017,6,17
6,2018,9,14
7,2019,8,14
8,2020,13,9
9,2021,3,3


##### AUTO TEXT RAPE VICTIMS

In [107]:
female_rape_victims_auto_text = compare_to_date_text(df, df.Country.unique(),'female_rape_victims',[2020, 2021], 2019)
analysis_results.append(female_rape_victims_auto_text)
female_rape_victims_auto_text

[{'Country': 'Austria',
  'Year': 2020,
  'female_rape_victims_text': 'Austria in 2020 had a -1.1% (1579.0) decrease in female_rape_victims compared to 2019 (1596.0).'},
 {'Country': 'Bulgaria',
  'Year': 2020,
  'female_rape_victims_text': 'Bulgaria in 2020 had a -15.5% (93.0) decrease in female_rape_victims compared to 2019 (110.0).'},
 {'Country': 'Croatia',
  'Year': 2020,
  'female_rape_victims_text': 'Croatia in 2020 had a -31.2% (289.0) decrease in female_rape_victims compared to 2019 (420.0).'},
 {'Country': 'Czechia',
  'Year': 2020,
  'female_rape_victims_text': 'Czechia in 2020 had a -14.4% (328.0) decrease in female_rape_victims compared to 2019 (383.0).'},
 {'Country': 'Czechia',
  'Year': 2021,
  'female_rape_victims_text': 'Czechia in 2021 had a 12.5% (431.0) increase in female_rape_victims compared to 2019 (383.0).'},
 {'Country': 'Denmark',
  'Year': 2020,
  'female_rape_victims_text': 'Denmark in 2020 had a -4.6% (1728.0) decrease in female_rape_victims compared to 20

# Save dataframe with auto generated text

In [93]:
analysis_text_list = [pd.DataFrame(item) for item in analysis_results]
d = reduce(lambda df1,df2: pd.merge(df1,df2,how='outer'), analysis_text_list)
d.sort_values(by=['Country','Year'])
d.to_csv('auto_generated_text_results.csv', index=False)

In [None]:
intent = df[['Country','Year','Intentional female homicides_pct_change', 'Intentional female homicides']].sort_values(by='Intentional female homicides_pct_change', ascending=False)

intent[intent.Year==2021].sort_values(by='Intentional female homicides_pct_change', ascending=False)

# Intentional Murders of females - victim perp relationships

In [None]:

vo.head()

In [None]:
# keep only females and number (exclude per 100k)
vo = vo[(vo.Victim_sex=='Females')&(vo.Unit=='Number')].reset_index(drop=True)
vo

In [None]:
# merge main dataset with victim-offender one
df_vo = pd.merge(df[['Country','Year','Intentional female homicides','female_population','Female deaths']],vo[['Country','Year','Relationship_type', 'Value']], how='left')

In [None]:
df_vo

In [None]:
# Pct of relationship between vic - offender to intentional female homicides
# NOTE: the Intentional female homicides are from EUROSTAT - as is the vic-offender dataset
# df_vo['pct_fam_partner'] = round((df_vo['Value']/df_vo['Intentional female homicides'])*100,1)

In [None]:
# per 100k of female deaths
df_vo['vic_offender_relationship_per_100k'] = round((df_vo[df_vo['Relationship_type']=='Total']['Value']/df_vo['female_population'])*100000,1)

In [None]:
# Sorted with pct_fam_partner
# df_vo[(df_vo['pct_fam_partner']<=100)].sort_values(by='pct_fam_partner', ascending=False)

In [None]:
totals = df_vo[df_vo['Relationship_type']=='Total']

In [None]:
totals.head()

In [None]:
# Sorted with relationshp_per_100k
df_vo.sort_values(by='vic_offender_relationship_per_100k', ascending=False)[['Country','Year','vic_offender_relationship_per_100k']]

In [None]:
# mean by country
df_vo.groupby('Country')['vic_offender_relationship_per_100k'].mean().reset_index().sort_values(by='vic_offender_relationship_per_100k', ascending=False)

In [None]:
totals = round((totals.groupby(['Country','Year']).Value.sum().pct_change())*100,1).reset_index().replace(np.inf, 100)
totals.head()

In [None]:
totals[totals.Year==2020].sort_values(by='Value', ascending=False).reset_index(drop=True)

In [None]:
df_vo[(df_vo.Year==2019) & (df_vo['Relationship_type']=='Intimate partner') & (df_vo['Value'].notna())].Country.unique()

In [None]:
df_vo[(df_vo.Year==2020) & (df_vo['Relationship_type']=='Intimate partner') & (df_vo['Value'].notna())].Country.unique()

In [None]:
df_vo[(df_vo.Year==2021) & (df_vo['Relationship_type']=='Intimate partner') & (df_vo['Value'].notna())].Country.unique()

In [None]:
#clean the dataset from nan values
# df_vo = df_vo.dropna(subset=['femicides','Value']).copy()

In [None]:
df_vo['pct_of_value_to_femicides'] = round((df_vo['Value']/df_vo['Intentional female homicides'])*100,1)

In [None]:
df_vo.to_csv('victim_offender_relationship.csv', index=False)

In [None]:
df[df.Country=='Spain']

# Rape Eurostat

In [None]:
rape_vic.head()

In [None]:
rp = pd.merge(df[['Country','Year','female_population','Female deaths']], rape_vic[['Country','Year','ICCS - CRIME DEFINITION', 'UNIT', 'Value']])

In [None]:
rp = rp[(rp.Year>2011)&(rp.UNIT=='Number')].copy()

In [None]:
rp['per100k'] = round((rp['Value']/rp.female_population)*100000,1)

In [None]:
rp.head()

In [None]:
rp.to_csv('rp.csv',index=False)

In [None]:
rp[rp.Country=='Sweden']

# Perpetrators - Eurostat

In [None]:
perp = read(two)

In [None]:
perp = perp[(perp.Year>2011)&(perp.SEX=='Males') & (perp.UNIT=='Number')].copy()

In [None]:
dfp = pd.merge(df[['Country','Year','Population','female_population','Female deaths']], perp[['Country','Year','ICSS CRIME','LEGAL STATUS OF PERPETRATOR','Value']])

In [None]:
dfp.head(2)

In [None]:
dfp['per_100k'] = round((dfp['Value']/(dfp['Population']-dfp['female_population']))*100000,1)

In [None]:
dfp.to_csv('dfp.csv',index=False)

In [None]:
dfp.rename(columns={'Value':'Value_perp'}, inplace=True)

In [None]:
# pd.merge(rp, dfp, how='left')

# Violence (physical, economical, psychological, sexual)

In [None]:
df

In [None]:
# Countries with the biggest increace in PHYSICAL VIOLENCE FROM 2019 onward --- COVID
df[(df['physical_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('physical_violence_pct_change',ascending=False)\
[['Country','Year','physical_violence_pct_change','physical_violence']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['physical_violence_pct_change']*3)
plt.title('% increase in physical violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in PSYCOLOGICAL VIOLENCE FROM 2019 onward --- COVID
df[(df['psychological_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('psychological_violence_pct_change',ascending=False)\
[['Country','Year','psychological_violence_pct_change','psychological_violence']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['psychological_violence_pct_change']*3)
plt.title('% increase in psychological violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in ECONOMIC VIOLENCE FROM 2019 onward --- COVID
df[(df['economic_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('economic_violence_pct_change',ascending=False)\
[['Country','Year','economic_violence_pct_change','economic_violence']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['economic_violence_pct_change']*3)
plt.title('% increase in economic violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in SEXUAL VIOLENCE FROM 2019 onward --- COVID
df[(df['sexual_violence_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('sexual_violence_pct_change',ascending=False)\
[['Country','Year','sexual_violence_pct_change','sexual_violence']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['sexual_violence_pct_change']*3)
plt.title('% increase in sexia; violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in INTENTIONAL FEMALE HOMICIDES FROM 2019 onward --- COVID
df[(df['Intentional female homicides_pct_change'].notna())&(df['Year']>2019)]\
.sort_values('Intentional female homicides_pct_change',ascending=False)\
[['Country','Year','Intentional female homicides_pct_change','Intentional female homicides']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['Intentional female homicides_pct_change']*3)
plt.title('% increase in intentional female homicides by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in VICTIMS OF INTIMATE VIOLENCE FROM 2019 onward --- COVID
df[(df['vic_intimate_violence_pct_change'].notna())&(df['Year']>2019)].sort_values(by='vic_intimate_violence_pct_change',ascending=False)[['Country','Year','vic_intimate_violence_pct_change']]


In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['vic_intimate_violence_pct_change']*3)
plt.title('% increase in VICTIMS OF INTIMATE VIOLENCE by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in REPORTED OFFENCES OF INTIMATE VIOLENCE FROM 2019 onward --- COVID
df[(df['reported_offences_int_violence_pct_change'].notna())&(df['Year']>2019)].sort_values('reported_offences_int_violence_pct_change',ascending=False)

In [None]:
df.sort_values(by='reported_offences_int_violence_pct_change', ascending=False)[['Country','Year','reported_offences_int_violence_pct_change']]

In [None]:
# Countries with the biggest increace in MEN HELD IN PRISON FROM 2019 onward --- COVID
df[(df['men_held_in_prison_pct_change'].notna())&(df['Year']>2019)].sort_values('men_held_in_prison_pct_change',ascending=False)

In [None]:
# Countries with the biggest increace in PROTECTION ORDERS FROM 2019 onward --- COVID
df[(df['protection_orders_pct_change'].notna())&(df['Year']>2019)].sort_values('protection_orders_pct_change',ascending=False)

In [None]:
# Countries with the biggest increace in MEN PROSECUTED FROM 2019 onward --- COVID
df[(df['men_prosecuted_for_int_violence_pct_change'].notna())&(df['Year']>2019)].sort_values('men_prosecuted_for_int_violence_pct_change',ascending=False)

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['men_prosecuted_for_int_violence_pct_change']*3)
plt.title('% increase in men prosecuted for int violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in MEN SENTENCED FROM 2019 onward --- COVID
df[(df['men_sentenced_pct_change'].notna())&(df['Year']>2019)].sort_values('men_sentenced_pct_change',ascending=False)

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['men_sentenced_pct_change']*3)
plt.title('% increase in men sentenced for int violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Countries with the biggest increace in MEN PERPETRATORS FROM 2019 onward --- COVID
df[(df['men_perpetrators_int_violence_pct_change'].notna())&(df['Year']>2019)].sort_values('men_perpetrators_int_violence_pct_change',ascending=False)

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['men_perpetrators_int_violence_pct_change']*3)
plt.title('% increase in men perpetrators for int violence by year')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# List with all the variables to compare the countries
[item for item in list(set(df.columns.tolist())) if item.endswith('_pct_change')]

# Missing data

In [None]:
df_long = pd.melt(df, id_vars=['country_code','Country','Year','Population','female_population','Deaths','Female deaths'],\
       var_name='item')
# df

In [None]:
# 10 countries with the most nan values in total
df_long
df_long['missing_values'] = pd.isna(df_long.value)
missing_values = df_long.groupby(['Country']).missing_values.value_counts(dropna=False).to_frame().unstack().droplevel(0, axis=1).reset_index()[['Country',True]].sort_values(by=True, ascending=False).reset_index(drop=True)
missing_values

In [None]:
# Missing values by country and year
missing_per_year = df_long.groupby(['Country','Year']).missing_values.value_counts(dropna=False).to_frame().unstack().droplevel(0, axis=1).reset_index()

missing_per_year

In [None]:
missing_per_country = pd.merge(missing_per_year.groupby('Country')[False].sum().reset_index(), missing_per_year.groupby('Country')[True].sum().reset_index(), how='right')

missing_per_country['pct_missing'] = round((missing_per_country[True]/(missing_per_country[True]+missing_per_country[False]))*100,1)

missing_per_country

In [None]:
# Missing values by country and year after 2015
missing_per_year_2015 = df_long[df_long['Year']>2014].groupby(['Country','Year']).missing_values.value_counts(dropna=False).to_frame().unstack().droplevel(0, axis=1).reset_index()
missing_per_year_2015 = pd.merge(missing_per_year_2015.groupby('Country')[False].sum().reset_index(), missing_per_year_2015.groupby('Country')[True].sum().reset_index(), how='right')

missing_per_year_2015['pct_missing'] = round((missing_per_year_2015[True]/(missing_per_year_2015[True]+missing_per_year_2015[False]))*100,1)

missing_per_year_2015

In [None]:
missing_per_year['pct_missing'] = round((missing_per_year[True]/(missing_per_year[True]+missing_per_year[False]))*100,1)

In [None]:
# Missing data per country and year. The darker, lighter the more data missing
dfw = pd.pivot(missing_per_year, index='Country', columns='Year', values='pct_missing')
plt.pcolor(dfw, cmap='RdBu_r', vmin=0, vmax=100)
plt.yticks(np.arange(0.5, len(dfw.index), 1), dfw.index)
plt.xticks(np.arange(0.5, len(dfw.columns), 1), dfw.columns)
plt.figure(figsize = (5,5))
plt.show()

In [None]:
missing_per_year

In [None]:
def check_data(value):
    return value if ~np.isnan(value) else '(NO DATA)'

def perpetrators(country, year):
    pct_sentenced_from_prosecuted = df.loc[(df['Country']==country)& (df['Year']==year)]['pct_sentenced_from_prosecuted'].item()
    pct_prison_from_sentenced = df.loc[(df['Country']==country)& (df['Year']==year)]['pct_prison_from_sentenced'].item()
    pct_prison_from_prosecuted = df.loc[(df['Country']==country)& (df['Year']==year)]['pct_prison_from_prosecuted'].item()
        
    report = f"In {country}, in {year}, {check_data(pct_sentenced_from_prosecuted)}% of the male pepretrators who were prosecuted were convicted,\
    of whom {check_data(pct_prison_from_sentenced)}% where jailed.\
    This means that {check_data(pct_prison_from_prosecuted)}% of the male perpetratros who were prosecuted ended up in jail."
    
    return report
    

In [None]:
# text results generator - We exclude Croatia because the data are skewed

years = range(2010,2022,1)
countries = list(set(df.Country.tolist()))
results = []
for year in years:
    for country in countries:
        
        text = {}
        try:
            a = df.loc[(df.Year==year)&(df.Country==country)].pct_prison_from_prosecuted.item()
            if ~np.isnan(a) and country != 'Croatia':
                text['Country']=country
                text['Year'] = year
                text['Text'] = perpetrators(country, year)
                results.append(text)
        except ValueError:
            pass
#         
results

In [None]:
# Κατηγοριοποίηση με βάση το ποσοστό των θυτών που καταλήγουν στη φυλακή
df.sort_values(by='pct_prison_from_prosecuted', )[['Country','Year','pct_prison_from_prosecuted']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['pct_prison_from_prosecuted']*3)
plt.title('% of presecuted that end up in jail')
plt.figure(figsize = (5,5))
plt.show()

In [None]:
# Κατηγοριοποίηση με βάση το ποσοστό των θυτών που οδηγούνται στη δικαιοσύνη

df.sort_values(by='pct_sentenced_from_prosecuted')[['Country','Year','pct_sentenced_from_prosecuted']]

In [None]:
ax = plt.scatter(data=df, y='Country', x='Year', s=df['pct_sentenced_from_prosecuted']*3)
plt.title('% of presecuted that are sentenced')
plt.figure(figsize = (5,5))
plt.show()

# AUTO GENERATED TEXT

In [None]:
def extract_info(df, location, value):
    return df.iloc[location][value]

In [None]:
",    ".join(df.columns.tolist())

In [None]:
# Years with values of femicides_pct_change
y = sorted(df[df['femicides_pct_change'].notna()].Year.unique())
y

In [None]:
# Years with values of femicides
yf = sorted(df[df['femicides'].notna()].Year.unique())
yf

In [None]:
# Number of countries with values of femicides
c = len(df[df['femicides'].notna()].Country.unique())

In [None]:
# Examine for which years we have more data
for item in y:
    result={}
    results = len(df[(df['Year']==item) & (df['femicides_pct_change'].notna())])
    print(item, " ", results)

In [None]:
# Find which countries have data for the three years 2016 - 2018
filtered_countries = df[(df['Year']>2015) & (df['Year']<2019) & (df['femicides_pct_change'].notna())]\
[['Country', 'Year', 'femicides_pct_change']]\
['Country'].value_counts().to_frame()


In [None]:
# comparable countries for the years 2016-18
comparable_countries = filtered_countries[filtered_countries['Country']>2].index.tolist()

In [None]:
df[(df.Year==2019) & (df.femicides.notna())].Country.unique()

In [None]:
df[(df.Year==2020) & (df.femicides.notna())].Country.unique()

In [None]:
df[(df.Year==2021) & (df.femicides.notna())].Country.unique()

### comparing femicides between a selected list of countries (EIGE)

In [None]:
# Countries which can be compared in femicides according to EIGE. We exclude Malta because
# it distorts the data due to the small number of recorded cases
comparable_femicides_countries.remove('Malta')
comparable_femicides_countries

In [None]:
df['comparable_femicides'] = df.Country.isin(comparable_femicides_countries)

In [None]:
df

In [None]:
# filtering our dataset to include only these countries
comparable_femicides = df[(df['Country'].isin(comparable_femicides_countries)) \
                          & (df['femicides_pct_change'].notna())]\
                            .reset_index(drop=True).copy()



In [None]:
# sorting the dataframe by femicides per 100k female deaths
comparable_femicides = comparable_femicides.sort_values(by='femicides_per_100000', \
                            ascending=False)[['Country','Year','femicides_per_100000']]\
                            .reset_index(drop=True)

comparable_femicides

In [None]:
df_wide = df[['Country','Year','femicides_pct_change']].pivot(index='Year', columns='Country', values='femicides_pct_change')
df_wide.head(2)

In [None]:

ax = df_wide.plot(kind='line', figsize=(5, 3))
ax.set_title("pct change in femicides in all countries")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.figure(figsize=(1,1))

plt.show()

In [None]:
f'According to EIGE we can compare the femicides data between these countries {", ".join(comparable_countries)}'

In [None]:
plt.figure(figsize = (5,5))
ax = sns.stripplot(data=comparable_femicides, x="Year", y="Country", 
              hue="femicides_per_100000",
              dodge=False, 
              jitter=True,
              s=10,
              marker="o", linewidth=.3, alpha=.80,native_scale=True
             )
# sns.set(rc = {'figure.figsize':(10,10)})
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
ax.set_title("Femicides in comparable (EIGE) countries per 100k female pop")
palette = sns.color_palette("flare", as_cmap=True)


In [None]:
comparable_femicides['viz_per_100k'] = comparable_femicides['femicides_per_100000']*500
ax = plt.scatter(data=comparable_femicides, y='Country', x='Year', s='viz_per_100k')
plt.margins(.1)
plt.figure(figsize = (7,7))
plt.show()

In [None]:
comparable_femicides

In [None]:
countries_to_compare_before_after_covid = comparable_femicides[(comparable_femicides['Year']>2018) & (comparable_femicides['Year']<2022)].Country.unique()

In [None]:
countries_to_compare_before_after_covid

In [None]:
cmp = df[(df.Country.isin(countries_to_compare_before_after_covid)) & (df.Year.isin([2019, 2020,2021]))]


In [None]:
cmp = cmp[['Country', 'Year','femicides','femicides_per_100000', 'Female deaths','femicides_pct_change']].sort_values(by=['Country','Year'], ascending=True).dropna().reset_index(drop=True)

In [None]:
cmp = cmp.sort_values(by='femicides_pct_change',ascending=False).copy()

# Details for data

In [None]:
# how many countries | which years | 
countries = df.Country.unique()
basic_txt = f"We examine {len(countries)} countries. We have data for the years {years[0]} to {years[-1]}. But we'll examine the data from 2015.\
The countries are: {', '.join(countries)}."
auto_text.append(basic_txt)
basic_txt

In [None]:
# pct of data missing (from 2011)
missing_per_country.sort_values(by='pct_missing')

In [None]:
# pct of data missing fromd 2015
missing_per_year_2015.sort_values(by='pct_missing')

In [None]:
# main indicators: femicides & types of violence & intentional homicide by intimate partner & family

In [None]:
# function that will add the text increase or decrease based on the given number
def inc_dec(data1):
    '''
    This function will return the word 'increase' or 'decrease' depending 
    of the value that you'll feed it. If it's a negative value, it will return decrease.
    '''
    if isinstance(data1, float):
        if data1>0:
            return "increase"
        else:
            return 'decrease'
    else:
        return ""

In [None]:
# function that will calculate pct_change of each the give value of each country and year, from 2019 
def compare_to_2019(df,country,value,year, base_year):
    '''
    This function will return the pct_change between two values in 2019 and your given year
    df -> your dataframe
    country -> the country you want to examine (that is included in your dataframe)
    value -> the column name of your dataframe that you want to extract your value from
    year -> the year you want your value to reflect
    
    e.g. compare_to_2019(df, 'Greece','femicides',2021) will return the pct_change of 
    femicides in 2021 compared to 2019 (before the pandemic)
    '''
    try:
        before = df[(df['Year']==base_year) & (df['Country']==country)][value].item()
        after = df[(df['Year']==year) & (df['Country']==country)][value].item()
        change = round(((after-before)/before)*100,1)
        return change
    except:
        return "no data"

In [None]:
# function that will get absolute number 
def abs_number(df,country,value,year):
    '''
    This function will return the value of a row of your dataframe.
    df -> your dataframe
    country -> the country you want to examine (that is included in your dataframe)
    value -> the column name of your dataframe that you want to extract your value from
    year -> the year you want your value to reflect
    e.g. abs_number(df, 'Greece','femicides',2019) will return the number of femicides in Greece in 2019
    '''
    try:
        number = df[(df['Year']==year) & (df['Country']==country)][value].item()
        return number
    except:
        return "no data"

In [None]:
def compare_to_2019_text(dataframe, list_of_countries, value, base_year):
    '''
    This function compares data to 2019. In order to work, you need to load the functions
    - compare_to_2019()
    - abs_number()
    e.g. It will compare the 'femicides' of 2020 to femicides of 2019, or femicides
    of 2021 to femicides to 2019.
    dataframe -> your dataframe
    list_of_countries -> list of the countries that exist in your dataframe
    value -> the column name that you want to compare to base year
    '''
    results_compare_to_2019=[]
    for country in list_of_countries:
        for year in [2020, 2021]:
            number = compare_to_2019(dataframe, country, value,year, base_year)
            if isinstance(number,float) and ~np.isnan(number):
                result_countries = f'{country} in {year} had a {number}% ({abs_number(dataframe, country, value, year)}) {inc_dec(number)} in {value} compared to {base_year} ({abs_number(dataframe, country, value, 2019)}).'
                results_compare_to_2019.append(result_countries)
            else:
                pass
    comparing_femicides_pct_change_compared_to_2019 = "\n".join(results_compare_to_2019)
    return comparing_femicides_pct_change_compared_to_2019


In [None]:
# Calling the compare_to_2019 function to print resutls only for the comparable (EIGE) countries
fem_per_country_comp_2019_txt = compare_to_2019_text(cmp, countries_to_compare_before_after_covid, 'femicides', 2019)
auto_text.append(fem_per_country_comp_2019_txt)
fem_per_country_comp_2019_txt

In [None]:
# Calling the function to print results for all the countries
all_countries_fem_comp_2019_txt = compare_to_2019_text(df, df.Country.unique(), 'femicides', 2019)

print(all_countries_fem_comp_2019_txt)

In [None]:


all_countries_fem_comp_2019_txt = compare_to_2019_text(df, df.Country.unique(), 'reported_offences_int_violence', 2018)

print(all_countries_fem_comp_2019_txt)

In [None]:
df[df.Country=='Slovenia'][['Year','reported_offences_int_violence']]

In [None]:
df.columns

In [None]:
print(compare_to_2019_text(df, df.Country.unique(), 'Intentional female homicides', 2019))

In [None]:
print(compare_to_2019_text(df, df.Country.unique(), 'men_sentenced', 2019))

In [None]:
abs_number(df, 'Greece', 'rape_victims', 2020)

## How many femicides have been recorded (total number)

In [None]:
total_num_femicides_txt = f"We have data on femicides (any year) in {c} countries in Europe from {yf[0]} to {yf[-1]}. \
More than {round(df['femicides'].sum(),-2)} ({df['femicides'].sum()}) femicides have been recorded, \
according to our analysis. This number underrepresents reality due to the lack of data. "
total_num_femicides_txt

## Which country had the biggest increase in femicides compared to the previous year?

In [None]:
country_with_biggest_increase_in_femicides = df[['Country','Year','femicides','femicides_pct_change']]\
.replace(np.inf, 100)\
.sort_values(by='femicides_pct_change', ascending=False)


In [None]:
biggest_increase_fem_txt = f"The country with the biggest increase in femicides was {country_with_biggest_increase_in_femicides.iloc[0].Country} \
with {country_with_biggest_increase_in_femicides.iloc[0].femicides_pct_change}% increase in \
{country_with_biggest_increase_in_femicides.iloc[0].Year} compared to \
{country_with_biggest_increase_in_femicides.iloc[0].Year-1}. \
{country_with_biggest_increase_in_femicides.iloc[1].Country} in {country_with_biggest_increase_in_femicides.iloc[1].Year} \
recorded a {country_with_biggest_increase_in_femicides.iloc[1].femicides_pct_change}% increase, \
while {country_with_biggest_increase_in_femicides.iloc[2].Country} and {country_with_biggest_increase_in_femicides.iloc[3].Country} had a \
{country_with_biggest_increase_in_femicides.iloc[2].femicides_pct_change}% and \
{country_with_biggest_increase_in_femicides.iloc[3].femicides_pct_change}% \
increase in {country_with_biggest_increase_in_femicides.iloc[2].Year} \
and {country_with_biggest_increase_in_femicides.iloc[3].Year}, respectively."

biggest_increase_fem_txt

In [None]:
country_with_biggest_increase_in_femicides.head(10)

In [None]:
comparable_fem_countries_txt = f"Amonght the countries that we can compare their femicides data \
(excluding Malta which records one or two femicides per year), \
{extract_info(comparable_femicides, 0, 'Country')} in {extract_info(comparable_femicides, 0, 'Year')} had the highest rate of femicides, with {extract_info(comparable_femicides, 0, 'femicides_per_100000')} femicides per 100k female pop.\
It is followed by {extract_info(comparable_femicides, 1, 'Country')} which in {extract_info(comparable_femicides, 1, 'Year')} recorded {extract_info(comparable_femicides, 1, 'femicides_per_100000')} femicides per 100k female pop.\
{extract_info(comparable_femicides, 2, 'Country')} and {extract_info(comparable_femicides, 3, 'Country')} follow with {extract_info(comparable_femicides, 2, 'femicides_per_100000')} and {extract_info(comparable_femicides, 3, 'femicides_per_100000')} in the years {extract_info(comparable_femicides, 2, 'Year')} and {extract_info(comparable_femicides, 3, 'Year')}, respectively."

comparable_fem_countries_txt

In [None]:
covid_comparable_fem_countries_txt = f"Among the countries with comparable femicide data, we have data for before and after the pandemic \
for {len(countries_to_compare_before_after_covid)}, which are {', '.join(countries_to_compare_before_after_covid)}.\
 Of those {extract_info(cmp,0,'Country')} has the biggest increase with {extract_info(cmp,0,'femicides_pct_change')}% increase in\
 {extract_info(cmp,0,'Year')}, compared to {extract_info(cmp,0,'Year')-1}, the year before the pandemic.\
 {extract_info(cmp,1,'Country')} follows with {extract_info(cmp,1,'femicides_pct_change')}% {inc_dec(extract_info(cmp,1,'femicides_pct_change'))} {extract_info(cmp,1,'Year')}, compared to {extract_info(cmp,1,'Year')-1}.\
 The third country in the list is {extract_info(cmp,2,'Country')} with {extract_info(cmp,2,'femicides_pct_change')}% {inc_dec(extract_info(cmp,2,'femicides_pct_change'))} {extract_info(cmp,0,'Year')}, compared to {extract_info(cmp,2,'Year')-1}."

covid_comparable_fem_countries_txt

## Out of 10  homicides of women (homicide_total) how many were femicides?

In [None]:
# To calculate for each country, all years on ever 10 homicides of women how many were femicides,
# we'll filter the dataset and exclude years that we have only one of the two values (femicides or homicides)
measure = 10
per_country_all_years = df[(df.femicides.notna()) & df.homicide_total.notna()][['Country','Year','femicides','homicide_total']]
per_country_all_years = per_country_all_years.groupby('Country')[['femicides','homicide_total']].sum().reset_index()
per_country_all_years['pct of femicides to female homicide victims'] = round((per_country_all_years['femicides']/per_country_all_years['homicide_total'])*measure,1)
per_country_all_years = per_country_all_years.sort_values(by='pct of femicides to female homicide victims', ascending=False)

country_one = extract_info(per_country_all_years, 0, 'Country')
country_two = extract_info(per_country_all_years, 1, 'Country')
country_three = extract_info(per_country_all_years, 2, 'Country')

one_fem = round(extract_info(per_country_all_years, 0, 'pct of femicides to female homicide victims'),)
two_fem = round(extract_info(per_country_all_years, 1, 'pct of femicides to female homicide victims'),)
three_fem = round(extract_info(per_country_all_years, 2, 'pct of femicides to female homicide victims'),)

out_of_10_homicides_txt = f"It is estimated that {round((per_country_all_years['femicides'].sum()/per_country_all_years['homicide_total'].sum())*measure)} out of {measure} \
homicides of women are recorded as femicides. \
According to the data we have collected (link methodology), \
in {country_one}, almost {one_fem} out of {measure} homicides against women were femicides. \
In {country_two}, {two_fem} out of {measure} victims of homicide in total were femicides. \
In {country_three}, {three_fem} out of {measure} victims of homicide in total were femicides."

out_of_10_homicides_txt

In [None]:
df.to_csv('master.csv', index=False)

In [None]:
df['fem_to_hom%'] = round((df['femicides']/df['homicide_total'])*100,1)

In [None]:
round(df.groupby('Country')['fem_to_hom%'].mean(),1).to_frame().reset_index().sort_values(by='fem_to_hom%', ascending=False)

In [None]:
df.sort_values(by='fem_to_hom%', ascending=False)[['Country','Year','fem_to_hom%']]

In [None]:
df.columns

In [None]:
df.sort_values(by=['pct of femicides to female homicide victims'], ascending=False)[['Country','Year','femicides','pct of femicides to female homicide victims']]

In [None]:
pd.melt(dfp, id_vars='Country',value_vars='Year')

In [None]:
df[df.Country=='Slovenia']