# Go betweeen ISO-3166 codes and names Tester


In [1]:
'''
    WARNING CONTROL to display or ignore all warnings
'''
import warnings; warnings.simplefilter('ignore')     #switch betweeb 'default' and 'ignore'
import traceback

''' Set debug flag to view extended error messages; else set it to False to turn off debugging mode '''
debug = True


## Instantiate Classes

In [24]:
import os
import sys

proj_dir = os.path.abspath(os.pardir)
sys.path.insert(1,proj_dir.split('rezaware/')[0])
from rezaware.modules.etl.transform import isoAlphaCodes as iso
from rezaware.modules.lib.spark import execSession as spark

''' restart initiate classes '''
if debug:
    import importlib
#     db = importlib.reload(db)
    gis=importlib.reload(iso)

__desc__ = "read and write files from and to postgresql database"
clsSpark = spark.Spawn(
    desc=__desc__,
)
if clsSpark.session:
    clsSpark._session.stop
clsISO = iso.dataWorkLoads(desc=__desc__)
print("\n%s class initialization and load complete!" % __desc__)

All functional ISOALPHACODES-libraries in TRANSFORM-package of ETL-module imported successfully!
execSession Class initialization complete
execSession Class initialization complete
isoAlphaCodes Class initialization complete

read and write files from and to postgresql database class initialization and load complete!


## Read Sample dataset

In [3]:
import pandas as pd
fpath = os.path.join(proj_dir.split('rezaware/')[0],"mining/data/property/hotel_groups.csv")

grps_df = pd.read_csv(fpath, header=1)
grps_df[['COMPANY 2','COUNTRY']]

Unnamed: 0,COMPANY 2,COUNTRY
0,Skyna Luanda,Angola
1,Sakkara Inn,Egypt
2,Sea Gull Hurghada,Egypt
3,Grand Oasis,Egypt
4,Tranquility Bay,Antigua and Barbuda
5,Accra,Barbados
6,Sunshine Suites,Cayman Islands
7,East Winds Inn,Saint Lucia
8,The Buccaneer,"Virgin Islands, U.S."
9,Posada Viena,Mexico


## Construct ISO 3661 from name values

In [25]:
from pyspark.sql import functions as F

_transform_dict = {
    "COUNTRY": {
        "ATTRNAME" : 'COUNTRY',
        "TRANSFORM" : {
            "ALPHA2" : 'country_3166_alpha2',
            "ALPHA3" : 'country_3166_alpha3',
            "UN" : 'country_un_code'
        }
    },
# TBD - when we have subdiv data 
#     "SUBDIV" : {
#         "ATTRNAME"  : 'PROVINCE',
#         "ALPHA2": 'country_3166_alpha2',
#         "TRANSFORM" : {
#             "TYPE" : 'subdiv_type',
#             'CODE' : 'subdiv_3166_code',
#         }
#     }
}

_iso_data_sdf = clsISO.get_iso_3166(
    data = grps_df, # non empty dataframe with coordinates columns
    t_iso=_transform_dict,  # dict of transformations to complete
)

_iso_data_sdf.select(F.col('COMPANY 2'), F.col('CITY'), F.col('COUNTRY'), 
                     F.col('ISO 3166-1'),F.col('country_3166_alpha2'),
                     F.col('country_3166_alpha3'), )\
                    .show(n=5, truncate=False)

+-----------------+---------------+-------------------+----------+-------------------+-------------------+
|COMPANY 2        |CITY           |COUNTRY            |ISO 3166-1|country_3661_alpha2|country_3661_alpha3|
+-----------------+---------------+-------------------+----------+-------------------+-------------------+
|Skyna Luanda     |Luanda         |Angola             |AGO       |AO                 |AGO                |
|Sakkara Inn      |Giza           |Egypt              |EGY       |EG                 |EGY                |
|Sea Gull Hurghada|Hurghada       |Egypt              |EGY       |EG                 |EGY                |
|Grand Oasis      |Sharm El Sheikh|Egypt              |EGY       |EG                 |EGY                |
|Tranquility Bay  |Bolans         |Antigua and Barbuda|ATG       |AG                 |ATG                |
+-----------------+---------------+-------------------+----------+-------------------+-------------------+
only showing top 5 rows



## Reverse iso codes to Names

In [26]:
from pyspark.sql import functions as F

_reverse_dict = {
    "COUNTRY": {
        "ALPHA3": 'country_3166_alpha3',
        "TRANSFORM" : {
            "NAME" : 'country_name',
            "OFFICIAL":'official_name',
            "NUMERIC" : 'the_un_code'
        }
    }
}

_rev_data_sdf = clsISO.reverse_iso_3166(
    data = _iso_data_sdf, # non empty dataframe with coordinates columns
    t_iso=_reverse_dict,  # dict of transformations to complete
)

_rev_data_sdf.select(F.col('COUNTRY'), F.col('country_3166_alpha3'),
            F.col('country_name'), F.col('country_un_code'), F.col('official_name'))\
            .show(n=5, truncate=False)

+-------------------+-------------------+-------------------+---------------+----------------------+
|COUNTRY            |country_3661_alpha3|country_name       |country_un_code|official_name         |
+-------------------+-------------------+-------------------+---------------+----------------------+
|Angola             |AGO                |Angola             |024            |Republic of Angola    |
|Egypt              |EGY                |Egypt              |818            |Arab Republic of Egypt|
|Egypt              |EGY                |Egypt              |818            |Arab Republic of Egypt|
|Egypt              |EGY                |Egypt              |818            |Arab Republic of Egypt|
|Antigua and Barbuda|ATG                |Antigua and Barbuda|028            |null                  |
+-------------------+-------------------+-------------------+---------------+----------------------+
only showing top 5 rows



In [21]:
import pycountry
country = 'angola'
# pycountry.countries.search_fuzzy(country)[0]
pycountry.countries.lookup(country)
# pycountry.countries.get(alpha_3='VIR')

Country(alpha_2='AO', alpha_3='AGO', flag='🇦🇴', name='Angola', numeric='024', official_name='Republic of Angola')

In [98]:
state = 'montana'
subdivs=pycountry.subdivisions.get(country_code='US')
div_vals = [x for x in subdivs if x.name.lower()==state.lower()]
print(div_vals)

[Subdivision(code='US-MT', country_code='US', name='Montana', parent_code=None, type='State')]
