In [1]:
# import libraries
from datetime import date
import numpy as np
import requests
import re
import pandas as pd
import os
import io
from datetime import datetime, timedelta
import functions # specific module for additional functions for this code
from glob import glob

In [2]:
# let's read the xlsx file directly in a DataFrame
url = 'https://www.sesam.search.admin.ch/sesam-search-web/pages/search.xhtml?Applikations-Version=1.4.0-92&lang=it&nameNamensteile=&volltextsuche=&sanktionsprogrammId=&adressatTyp=&action=generateExcelAction'
response = requests.get(url)
file_content = io.BytesIO(response.content)  # create a stream in memory, the url target an xlsx file
df = pd.read_excel(file_content)  

In [4]:
# different countries for the same ID within a cell, so I need to explode the country column and build a multirecord DataFrame
df_countries = df[['SSID', 'Nationality']]
df_countries['Nationality'] = df_countries['Nationality'].str.replace(r'\r\n', ';', regex=True)
# explode 'Nationality' in multiple rows
df_countries = df_countries.set_index('SSID')['Nationality'].str.split(';', expand=True).stack().reset_index(name='Nationality')
df_countries['SSID'] = df_countries['SSID'].astype(int)
df_countries = df_countries[['SSID', 'Nationality']]

In [5]:
# now a bit of data manipulation 
df_ch = df[['SSID', 'Sanctions', 'Target type', 'Listed']]
df_ch_analysis = pd.merge(df_ch, df_countries[['SSID', 'Nationality']], on='SSID', how='outer')
new_names = {
    'SSID': 'sanctioned_id',
    'Listed': 'sanction_listing_date',
    'Target type': 'sanctioned_type',
    'Sanctions': 'sanction_text',
    'Nationality': 'sanctioned_country',
}

df_ch_analysis = df_ch_analysis.rename(columns=new_names)
df_ch_analysis['sanction_body'] = 'CH'
df_ch_analysis["sanctioned_country_iso3"] = df_ch_analysis["sanctioned_country"].apply(functions.descr_to_iso3)

# I had to add some adjustemts beacuse the file has invalid names for the lookup function 
df_ch_analysis['sanctioned_country_iso3'] = df_ch_analysis['sanctioned_country_iso3'].str.replace('Congo DR', 'COD')
df_ch_analysis['sanctioned_country_iso3'] = df_ch_analysis['sanctioned_country_iso3'].str.replace('DPR Korea', 'PRK')
df_ch_analysis['sanctioned_country_iso3'] = df_ch_analysis['sanctioned_country_iso3'].str.replace('Soviet Union', 'SUN')
df_ch_analysis['sanctioned_country_iso3'] = df_ch_analysis['sanctioned_country_iso3'].str.replace('Palestinian territory', 'PSE')

# standardizing the entries as OFAC ones
df_ch_analysis['sanctioned_type'] = df_ch_analysis['sanctioned_type'].str.replace('PERSON', 'Individual')
df_ch_analysis['sanctioned_type'] = df_ch_analysis['sanctioned_type'].str.replace('ORGANISATION', 'Entity')
df_ch_analysis['sanctioned_type'] = df_ch_analysis['sanctioned_type'].str.replace('SCHIFF', 'Ship')
df_ch_analysis['sanction_listing_date'] = pd.to_datetime(df_ch_analysis['sanction_listing_date'])

In [None]:
# this cell saves the DataFrame on your google drive (I use Google colab). You can just save he DataFrame on your local directory as well
from google.colab import drive
drive.mount('/content/drive')
df_ch_analysis.to_csv('/content/drive/My Drive/df_ch_analysis.csv', sep=';', index=False)