In [None]:
# import libraries
from datetime import date
import numpy as np
import requests
import pandas as pd
import re
import os
import io
import json
from datetime import datetime, timedelta
import time
import functions # specific module for additional functions for this code
from glob import glob
pd.set_option('display.width', 2000)

In [None]:
# these urls target two different but complementary files 
# the response is actually really easy to read, so it's possible to directly DataFrame it
url1 = 'https://webgate.ec.europa.eu/fsd/fsf/public/files/csvFullSanctionsList/content?token=dG9rZW4tMjAxNw'
df_1 = pd.read_csv(url1, sep=';')
url2 = 'https://webgate.ec.europa.eu/fsd/fsf/public/files/csvFullSanctionsList_1_1/content?token=dG9rZW4tMjAxNw'
df_2 = pd.read_csv(url2, sep=';', low_memory=False)

In [None]:
# now lets isolate the columns needed for the purpose of this codes 
# 'replace_blanks' is a function called from the module defined below. For details about the function, please open the module and read the comments
df_1 = df_1[['Entity_logical_id', 'Leba_publication_date', 'Programme', 'Naal_wholename', 'Subject_type', 'Leba_numtitle', 'Addr_country']]
df_2['listing_date'] = df_2.apply(functions.replace_blanks, axis=1)
df_2 = df_2[['Entity_LogicalId', 'listing_date', 'Entity_SubjectType', 'Entity_Regulation_Programme', 'NameAlias_WholeName', 'Entity_Regulation_NumberTitle', 'Address_CountryIso2Code']]

In [None]:
# standardize columns name: 
# everytihng that's referred to the single sanctioned subject has 'sanctioned_' before the col name
# everytihng that's referred to the sanction itself has 'sanction_' before the col name
new_names1 = {
    'Entity_logical_id': 'sanctioned_id',
    'Leba_publication_date': 'sanction_listing_date',
    'Subject_type': 'sanctioned_type',
    'Programme': 'sanction_programme',
    'Leba_numtitle': 'sanction_regulation',
    'Naal_wholename': 'sanctioned_aliases',
    'Addr_country': 'sanctioned_country_iso3'
}

df_1 = df_1.rename(columns=new_names1)

new_names2 = {
    'Entity_LogicalId': 'entity_id',
    'Entity_DesignationDate': 'sanction_listing_date',
    'Entity_SubjectType': 'sanctioned_type',
    'Entity_Regulation_Programme': 'sanction_programme',
    'Entity_Regulation_NumberTitle': 'sanction_regulation',
    'NameAlias_WholeName': 'sanctioned_aliases',
    'Address_CountryIso2Code': 'sanctioned_country_iso2'
}

df_2 = df_2.rename(columns=new_names2)

In [None]:
# now let's build up the final DataFrame for the analysis
df_eu = pd.concat([df_1, df_2], ignore_index=True)
df_eu = df_eu.drop_duplicates()

# get rid of useless DataFrames
del df_1
del df_2

# a little bit of data manipulation. I'm creating a unique string for each sancion because the EU splits it in two columns
df_eu['sanctioned_aliases'] = df_eu['sanctioned_aliases'].fillna('no eu alias')
df_eu['sanction_text'] = df_eu['sanction_programme']+' - '+df_eu['sanction_regulation']

# now select the columns needed and tracks the sanctioning body 
df_eu_analysis = df_eu[['sanctioned_id', 'sanctioned_country_iso2', 'sanctioned_country_iso3', 'sanction_text', 'sanctioned_type', 'sanction_listing_date']]
df_eu_analysis['sanction_body'] = 'EU'
del df_eu

In [None]:
# this cell saves the DataFrame on your google drive (I use Google colab). You can just save he DataFrame on your local directory as well
from google.colab import drive
drive.mount('/content/drive')
df_eu_analysis.to_csv('/content/drive/My Drive/df_eu_analysis.csv', sep=';', index=False)

In [None]:
df_eu_analysis.to_csv('C:/Users/valer/OneDrive/Desktop/python/input/df_eu_analysis.csv', sep=';', index=False)