In [194]:

class peace():
    """
    This is a function to pull the most updated peace data modified from https://www.kaggle.com/kretes/gpi2008-2016
    
    Global Peace Index (GPI) measures the relative position of nations' and regions' peacefulness.
    The GPI ranks 163 independent states and territories (99.7 per cent of the world’s population)
    according to their levels of peacefulness. In the past decade, the GPI has presented trends of 
    increased global violence and less peacefulness.
    
    The lower the number indicates more peace in the region.
    """
    def __init__(self):
        import requests
        import re
        from bs4 import BeautifulSoup

    def __new__(self):
        import requests
        import re
        from bs4 import BeautifulSoup
        
        response = requests.get(url='https://en.wikipedia.org/wiki/Global_Peace_Index')
        soup = BeautifulSoup(response.text, 'html.parser')
        base_year = 2018        #latest year
        years = 9               #number of years to get data


        def get_countries_by_gpi():
            i = 1
            for table in soup.find_all('table', re.compile('wikitable sortable')):
                if table.find_all('th')[0].get_text() == 'Country\n':
                    for tr in table.find_all('tr'):
                        country_name = tr.find_all('a')[0].get_text()
                        if not country_name.startswith('['):
                            row = {'country': country_name}
                            for year, index in zip(range(base_year-years+1,base_year+1),range(2*(years),0,-2)):
                                score = tr.find_all('td')[index].get_text()
                                if score != '' and score != '\n':
                                    row['score_%s' % year] = float(score)
                            yield row

        import pandas as pd
        gpi = pd.DataFrame.from_dict(list(get_countries_by_gpi()))
        # print(gpi)
        gpi.to_csv('gpi_%s-%s.csv' % (base_year-years+1,base_year),index=False)
        return(gpi)    

In [195]:
peace()

Unnamed: 0,country,score_2010,score_2011,score_2012,score_2013,score_2014,score_2015,score_2016,score_2017,score_2018
0,Iceland,1.212,1.148,1.113,1.162,1.189,1.148,1.192,1.137,1.096
1,New Zealand,1.188,1.279,1.239,1.237,1.236,1.221,1.287,1.241,1.192
2,Austria,1.290,1.337,1.328,1.250,1.200,1.198,1.278,1.265,1.274
3,Portugal,1.366,1.453,1.470,1.467,1.425,1.344,1.356,1.258,1.318
4,Denmark,1.341,1.289,1.239,1.207,1.193,1.150,1.246,1.337,1.353
5,Canada,1.392,1.355,1.317,1.306,1.306,1.287,1.388,1.371,1.372
6,Czech Republic,1.360,1.320,1.396,1.404,1.381,1.341,1.360,1.360,1.381
7,Singapore,1.624,1.585,1.521,1.438,1.545,1.490,1.535,1.534,1.382
8,Japan,1.247,1.287,1.326,1.293,1.316,1.323,1.395,1.408,1.391
9,Ireland,1.337,1.370,1.328,1.370,1.384,1.354,1.433,1.408,1.393


In [193]:
class UNODC_crime(object):
    def __init__(self, sheet):
        """
        Crime data from UNODC including 
        'Car theft', 'Burglary', 'Robbery', 'Sexual Assault', 'Physical Assault', 'Bribery', 'Police Reporting Car Theft',
        'Police Reporting Burglary', 'Police Reporting Robbery', 'Police Reporting Sexual Assault', 'Police Reporting Bribery'
        or type 'ALL' to get all data.
        
        """

        import pandas as pd
        from pandas import read_excel


    def __new__(self, sheet = None):
        
#         sheet = input("Please choose crime report from the following categories\n'Car theft', 'Burglary', 'Robbery', 'Sexual Assault', 'Physical Assault', 'Bribery', 'Police Reporting Car Theft','Police Reporting Burglary', 'Police Reporting Robbery', 'Police Reporting Sexual Assault', 'Police Reporting Bribery'")         
        
        if sheet == "ALL":
            sheet = None
        file_path = "590PR_final_datasets"
        file_name = "data-and-metadata-from-crime-victimisation-surveys-2004-2014-xlsx-1.xlsx"
        self.dfs = pd.read_excel(file_path + "/" + file_name, sheetname = sheet, header = 12)
        
#         display(self.dfs)
#         display(self.dfs.keys())

        return self.dfs

In [181]:
burglary = UNODC_crime('Burglary')
burglary

  return func(*args, **kwargs)


Unnamed: 0,region_name,sub_region_name,country_name,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,Do data comply with this definition?,"If NO, please provide details"
0,Africa,Southern Africa,South Africa,,,,,,,4.5,5.4,,4.7,,NO,Definition does not explicity exclude theft fr...
1,Americas,Caribbean,Barbados,,,,,,5.0,,,,,,YES,(blank)
2,Americas,Caribbean,Jamaica,,,3.5,,,4.5,,,1.7,,,,
3,Americas,Caribbean,Trinidad and Tobago,,,,8.4,,,,,,,,,
4,Americas,Central America,Costa Rica,,,,,10.56,,9.05,,,,6.72,,
5,Americas,Central America,Mexico,,,,,,1.75,1.8,4.5,4.7,5.54,,SÍ,(blank)
6,Americas,Northern America,Canada,8.8,,,,,8.3,,,,,,,
7,Americas,Northern America,United States of America,,,,,1.93,2.0,1.94,1.9,2.1,1.93,1.67,,In last 6 months over successive interviews du...
8,Americas,South America,Bolivia (Plurinational State of),,,,,,,,6.3,,5.9,,NO,No se registran los intentos de robo en vivien...
9,Americas,South America,Chile,,,7.8,5.6,5.2,5.3,4.3,5.7,4.4,4.2,3.6,NO,Para la ENUSC la categoría corresponde a hogar...


In [182]:
UNODC_crime()

  return func(*args, **kwargs)


OrderedDict([('Car theft',    region_name            sub_region_name  \
              0       Africa            Southern Africa   
              1     Americas                  Caribbean   
              2     Americas                  Caribbean   
              3     Americas            Central America   
              4     Americas            Central America   
              5     Americas           Northern America   
              6     Americas           Northern America   
              7     Americas              South America   
              8     Americas              South America   
              9     Americas              South America   
              10    Americas              South America   
              11    Americas              South America   
              12    Americas              South America   
              13    Americas              South America   
              14        Asia               Eastern Asia   
              15        Asia         South-

In [None]:
# !pip install gdelt

In [174]:
class GDELT(object):
    def __init__(self):
        """
        The GDELT Project is the largest, most comprehensive, and highest resolution open database of human society ever created. 
        Just the 2015 data alone records nearly three quarters of a trillion emotional snapshots and more than 1.5 billion location 
        references, while its total archives span more than 215 years, making it one of the largest open-access spatio-temporal 
        datasets in existance and pushing the boundaries of "big data" study of global human society. Its Global Knowledge Graph 
        connects the world's people, organizations, locations, themes, counts, images and emotions into a single holistic network 
        over the entire planet.
        
        This is Gdelt 2.0 (supports from the date after Feb, 2015)
        This function pull the events ids and the country it takes place
        Please input the date in the format of 'yyyy mm dd'.
        
        This function use a package from https://github.com/linwoodc3/gdeltPyR
        
        """
    def __new__(self, start, end):
        import gdelt

        gd = gdelt.gdelt(version=2)
        results = gd.Search([start,end],table='events',coverage=True,translation=False)
        return (results[['GLOBALEVENTID','ActionGeo_FullName']])

In [175]:
g = GDELT("2017 10 15", "2017 10 16")

In [176]:
display(g)

Unnamed: 0,GLOBALEVENTID,ActionGeo_FullName
0,697952828,"California, United States"
1,697952829,"New York, United States"
2,697952830,"Michigan, United States"
3,697952831,"Montana, United States"
4,697952832,"Maryland, United States"
5,697952833,"Montana, United States"
6,697952834,"Montana, United States"
7,697952835,"California, United States"
8,697952836,"California, United States"
9,697952837,"Montana, United States"


In [131]:
class Happiness():
    def __init__(self):

        import pandas as pd
        import zipfile



    def __new__(self):
        
        file_path = "590PR_final_datasets"
        file_name = "world-happiness-report.zip"

        zf = zipfile.ZipFile(file_path + '/' + file_name) 

        df = []
        happy = {}
        for name in zipfile.ZipFile.infolist(zf):
            happy[name.filename] = pd.read_csv(zf.open(name.filename))
#             df.append(pd.read_csv(zf.open(name.filename)))
#         display(df)
#         return [df[0], df[1], df[2]]
        return happy

In [133]:
ha = Happiness()
ha.keys()

dict_keys(['2015.csv', '2016.csv', '2017.csv'])

In [140]:
class Freedom():
    def __init__(self):

        import pandas as pd

    def __new__(self):
        
        file_path = "590PR_final_datasets"
        file_name = "the-human-freedom-index.zip"

        df = pd.read_csv(file_path + '/' + file_name, compression = 'zip')
        
        return df





In [141]:
Freedom()

Unnamed: 0,year,ISO_code,countries,region,pf_rol_procedural,pf_rol_civil,pf_rol_criminal,pf_rol,pf_ss_homicide,pf_ss_disappearances_disap,...,ef_regulation_business_bribes,ef_regulation_business_licensing,ef_regulation_business_compliance,ef_regulation_business,ef_regulation,ef_score,ef_rank,hf_score,hf_rank,hf_quartile
0,2016,ALB,Albania,Eastern Europe,6.661503,4.547244,4.666508,5.291752,8.920429,10.0,...,4.050196,7.324582,7.074366,6.705863,6.906901,7.54,34.0,7.568140,48.0,2.0
1,2016,DZA,Algeria,Middle East & North Africa,,,,3.819566,9.456254,10.0,...,3.765515,8.523503,7.029528,5.676956,5.268992,4.99,159.0,5.135886,155.0,4.0
2,2016,AGO,Angola,Sub-Saharan Africa,,,,3.451814,8.060260,5.0,...,1.945540,8.096776,6.782923,4.930271,5.518500,5.17,155.0,5.640662,142.0,4.0
3,2016,ARG,Argentina,Latin America & the Caribbean,7.098483,5.791960,4.343930,5.744791,7.622974,10.0,...,3.260044,5.253411,6.508295,5.535831,5.369019,4.84,160.0,6.469848,107.0,3.0
4,2016,ARM,Armenia,Caucasus & Central Asia,,,,5.003205,8.808750,10.0,...,4.575152,9.319612,6.491481,6.797530,7.378069,7.57,29.0,7.241402,57.0,2.0
5,2016,AUS,Australia,Oceania,8.439110,7.525648,7.364078,7.776279,9.623538,10.0,...,7.238900,8.944387,8.823021,8.121631,8.465526,7.98,10.0,8.582219,4.0,1.0
6,2016,AUT,Austria,Western Europe,8.969832,7.872188,7.673227,8.171749,9.737912,10.0,...,6.418890,7.296116,8.531578,7.419870,7.371334,7.58,27.0,8.413474,16.0,1.0
7,2016,AZE,Azerbaijan,Caucasus & Central Asia,,,,4.270861,9.143499,10.0,...,5.353533,6.959699,7.814181,7.080078,6.874880,6.49,106.0,6.083277,130.0,4.0
8,2016,BHS,Bahamas,Latin America & the Caribbean,6.930835,6.008696,6.262840,6.400790,0.000000,10.0,...,,7.981672,7.388227,7.523034,8.169125,7.34,49.0,7.397269,50.0,2.0
9,2016,BHR,Bahrain,Middle East & North Africa,,,,5.900339,9.790481,10.0,...,6.596745,8.074670,9.680534,8.018774,7.415786,7.56,30.0,6.848035,75.0,2.0


In [135]:
class Poverty():
    def __init__(self):

        import pandas as pd
        import zipfile



    def __new__(self):
        
        file_path = "590PR_final_datasets"
        file_name = "PovStats_csv.zip"

        zf = zipfile.ZipFile(file_path + '/' + file_name) 
        df = []
        pov = {}
        for name in zipfile.ZipFile.infolist(zf):
#             print(name.filename)
#             df.append(pd.read_csv(zf.open(name.filename)))
            pov[name.filename] = pd.read_csv(zf.open(name.filename))
#         display(df)
#         return [df[0], df[1], df[2]]
        return pov




In [137]:
Poverty().keys()

dict_keys(['PovStatsData.csv', 'PovStatsCountry.csv', 'PovStatsSeries.csv', 'PovStatsCountry-Series.csv', 'PovStatsFootNote.csv'])

In [138]:
class Suicide():
    def __init__(self):

        import pandas as pd

    def __new__(self):
        
        file_path = "590PR_final_datasets"
        file_name = "suicide-rates-overview-1985-to-2016.zip"

        df = pd.read_csv(file_path + '/' + file_name, compression = 'zip')
        
        return df







In [139]:
Suicide()

Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers
5,Albania,1987,female,75+ years,1,35600,2.81,Albania1987,,2156624900,796,G.I. Generation
6,Albania,1987,female,35-54 years,6,278800,2.15,Albania1987,,2156624900,796,Silent
7,Albania,1987,female,25-34 years,4,257200,1.56,Albania1987,,2156624900,796,Boomers
8,Albania,1987,male,55-74 years,1,137500,0.73,Albania1987,,2156624900,796,G.I. Generation
9,Albania,1987,female,5-14 years,0,311000,0.00,Albania1987,,2156624900,796,Generation X


In [144]:
class Unemployment():
    def __init__(self):

        import pandas as pd

    def __new__(self):
        
        file_path = "590PR_final_datasets"
        file_name = "unemployment.zip"

        df = pd.read_csv(file_path + '/' + file_name, compression = 'zip')
        
        return df





In [145]:
Unemployment()

Unnamed: 0,OID,Country or Area,Year,Description,Magnitude,Value
0,91467R..ZF...,ALBANIA,2008,UNEMPLOYMENT RATE,PERCENT PER ANNU,12.75
1,91467C..ZF...,ALBANIA,2008,UNEMPLOYMENT (IN THOUSANDS),THOUSANDS,140599.00
2,91467D..ZF...,ALBANIA,2008,LABOR FORCE (IN THOUSANDS),THOUSANDS,1102890.00
3,91467E..ZF...,ALBANIA,2008,EMPLOYMENT(IN THOUSANDS),THOUSANDS,962293.00
4,91467R..ZF...,ALBANIA,2007,UNEMPLOYMENT RATE,PERCENT PER ANNU,13.36
5,91467C..ZF...,ALBANIA,2007,UNEMPLOYMENT (IN THOUSANDS),THOUSANDS,144458.00
6,91467D..ZF...,ALBANIA,2007,LABOR FORCE (IN THOUSANDS),THOUSANDS,1079570.00
7,91467E..ZF...,ALBANIA,2007,EMPLOYMENT(IN THOUSANDS),THOUSANDS,935116.00
8,91467R..ZF...,ALBANIA,2006,UNEMPLOYMENT RATE,PERCENT PER ANNU,13.90
9,91467C..ZF...,ALBANIA,2006,UNEMPLOYMENT (IN THOUSANDS),THOUSANDS,150332.00


In [151]:
def Hunger():
    file_path = "590PR_final_datasets"
    file_name = "Hunger.csv"
    import pandas as pd
    df_hunger = pd.read_csv(file_path + "/" + file_name, na_values = '\t', sep = '\t', header = 0)
    #display(df_hunger)
    list = []
    list.append(df_hunger['Country Name'])
    list.append(df_hunger['Indicator Name'])
    for i in range(2009, 2019):
        year = str(i)
        df = df_hunger[year]
        list.append(df)
    df_new_hunger = pd.concat(list,axis = 1)
    return df_new_hunger

In [152]:
Hunger()

Unnamed: 0,Country Name,Indicator Name,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Aruba,Prevalence of undernourishment (% of population),,,,,,,,,,
1,Afghanistan,Prevalence of undernourishment (% of population),22.900000,22.100000,22.200000,23.400000,25.500000,27.600000,29.300000,30.300000,,
2,Angola,Prevalence of undernourishment (% of population),42.500000,40.400000,38.200000,35.100000,30.800000,27.000000,24.800000,23.900000,,
3,Albania,Prevalence of undernourishment (% of population),8.400000,7.400000,6.600000,6.000000,5.700000,5.500000,5.500000,5.500000,,
4,Andorra,Prevalence of undernourishment (% of population),,,,,,,,,,
5,Arab World,Prevalence of undernourishment (% of population),10.016621,9.676187,9.403567,11.083189,11.224646,11.459653,11.782652,12.051903,,
6,United Arab Emirates,Prevalence of undernourishment (% of population),6.000000,5.900000,5.600000,5.100000,4.500000,3.900000,3.100000,2.500000,,
7,Argentina,Prevalence of undernourishment (% of population),4.100000,4.000000,3.900000,3.700000,3.500000,3.400000,3.600000,3.800000,,
8,Armenia,Prevalence of undernourishment (% of population),5.300000,5.500000,5.600000,5.100000,4.600000,4.300000,4.300000,4.300000,,
9,American Samoa,Prevalence of undernourishment (% of population),,,,,,,,,,


In [282]:
class Trade():
    """
    WITS Trade Stats is a database created by aggregating data from UN COMTRADE and UNCTAD TRAINS database. 
    It provides information on bilateral trade exports, imports and tariffs for over 180 countries and regions.
    
    """
    
    
    def __init__(self):
        import pandas as pd

    def __new__(self):
        import logging
        logging.basicConfig(filename="test.log", level=logging.DEBUG)


        file_path = "590PR_final_datasets"
        file_name = "wits_en_trade_summary_allcountries_allyears.zip"

        zf = zipfile.ZipFile(file_path + '/' + file_name) 
        df = []
        for name in zipfile.ZipFile.infolist(zf):
            logging.debug(name.filename)
            try:
                df.append(pd.read_csv(zf.open(name.filename), header=0 ))
            except:
                pass

        frame = pd.concat(df, axis=0, ignore_index=True)
        return frame

In [284]:
Trade()

Unnamed: 0,Reporter,Partner,Product categories,Indicator Type,Indicator,2015,2014,2013,2012,2011,...,1997,1996,1995,1994,1993,1992,1991,1990,1989,1988
0,Afghanistan,...,...,Development,GDP (current US$ Mil),19702.99,20050.19,20046.33,20536.54,17930.24,...,,,,,,,,,,
1,Afghanistan,...,...,Export,No. Of Export partners,53.00,7.00,7.00,7.00,37.00,...,,,,,,,,,,
2,Afghanistan,...,...,Export,No. Of Export products,43.00,5.00,5.00,5.00,31.00,...,,,,,,,,,,
3,Afghanistan,...,...,Import,No. Of Import partners,85.00,8.00,8.00,8.00,72.00,...,,,,,,,,,,
4,Afghanistan,...,...,Import,No. Of Import products,67.00,15.00,15.00,15.00,64.00,...,,,,,,,,,,
5,Afghanistan,...,...,Tariff,No. Of Tariff Agreement,,,2.00,2.00,,...,,,,,,,,,,
6,Afghanistan,...,...,Development,Trade Balance (current US$ Mil),-8235.75,-7860.91,-8710.11,-6905.52,-6829.03,...,,,,,,,,,,
7,Afghanistan,China,All Products,Export,Trade (US$ Mil)-Top 5 Export Partner,,,20.28,4.80,,...,,,,,,,,,,
8,Afghanistan,China,All Products,Import,Trade (US$ Mil)-Top 5 Import Partner,1044.00,1038.20,136.25,713.66,577.18,...,,,,,,,,,,
9,Afghanistan,Germany,All Products,Export,Trade (US$ Mil)-Top 5 Export Partner,,17.05,,,,...,,,,,,,,,,
