### 1. Data Preparation

#### a. Import Data

In [8]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [9]:
df = pd.read_excel('1. Country Data.xlsx')
df.head()

Unnamed: 0,country,child_mort,exports,health,imports,income,inflation,life_expec,total_fer,gdpp,fertility_level,development_level,mortality_risk,inflation_risk,income_level
0,Afghanistan,90.2,10.0,7.58,44.9,1610,9.44,56.2,5.82,553,High,Underdeveloped,High,Moderate,Low Income
1,Albania,16.6,28.0,6.55,48.6,9930,4.49,76.3,1.65,4090,Low,Developing,Low,Low,Middle Income
2,Algeria,27.3,38.4,4.17,31.4,12900,16.1,76.5,2.89,4460,Medium,Developing,Medium,High,Middle Income
3,Angola,119.0,62.3,2.85,42.9,5900,22.4,60.1,6.16,3530,High,Underdeveloped,High,High,Middle Income
4,Antigua and Barbuda,10.3,45.5,6.03,58.9,19100,1.44,76.8,2.13,12200,Medium,Developed,Low,Low,High Income


#### b. Data Understanding

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167 entries, 0 to 166
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   country            167 non-null    object 
 1   child_mort         167 non-null    float64
 2   exports            167 non-null    float64
 3   health             167 non-null    float64
 4   imports            167 non-null    float64
 5   income             167 non-null    int64  
 6   inflation          167 non-null    float64
 7   life_expec         167 non-null    float64
 8   total_fer          167 non-null    float64
 9   gdpp               167 non-null    int64  
 10  fertility_level    167 non-null    object 
 11  development_level  167 non-null    object 
 12  mortality_risk     167 non-null    object 
 13  inflation_risk     159 non-null    object 
 14  income_level       167 non-null    object 
dtypes: float64(7), int64(2), object(6)
memory usage: 19.7+ KB


In [11]:
numbers = df.select_dtypes(include=['number']).columns
categories = df.select_dtypes(exclude=['number']).columns

In [12]:
round(df[numbers].describe(),2)

Unnamed: 0,child_mort,exports,health,imports,income,inflation,life_expec,total_fer,gdpp
count,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0
mean,38.27,41.11,6.82,46.89,17144.69,7.78,70.56,2.95,12964.16
std,40.33,27.41,2.75,24.21,19278.07,10.57,8.89,1.51,18328.7
min,2.6,0.11,1.81,0.07,609.0,-4.21,32.1,1.15,231.0
25%,8.25,23.8,4.92,30.2,3355.0,1.81,65.3,1.8,1330.0
50%,19.3,35.0,6.32,43.3,9960.0,5.39,73.1,2.41,4660.0
75%,62.1,51.35,8.6,58.75,22800.0,10.75,76.8,3.88,14050.0
max,208.0,200.0,17.9,174.0,125000.0,104.0,82.8,7.49,105000.0


In [13]:
df[categories].describe()

Unnamed: 0,country,fertility_level,development_level,mortality_risk,inflation_risk,income_level
count,167,167,167,167,159,167
unique,167,3,3,3,3,3
top,Afghanistan,Low,Underdeveloped,Low,Low,High Income
freq,1,65,74,86,74,67


#### d. Feature Engineering

In [14]:
#1. fertility category
df['fertility_level'] = pd.cut(
    df['total_fer'],
    bins=[0, 2.1, 4, 10],
    labels=['Low', 'Medium', 'High']
)

In [15]:
#2. development_level
df['development_level'] = pd.cut(
    df['gdpp'],
    bins=[0, 12000, df['gdpp'].max()],   # batas negara berkembang vs maju
    labels=['Developing', 'Developed']
)


In [16]:
#3. Mortality Risk Level
df['mortality_risk'] = pd.cut(
    df['child_mort'],
    bins=[0, 20, 50, df['child_mort'].max()],
    labels=['Low', 'Medium', 'High']
)

In [17]:
#4. Inflation Risk Category
df['inflation_risk'] = pd.cut(
    df['inflation'],
    bins=[0, 5, 10, df['inflation'].max()],
    labels=['Low', 'Moderate', 'High']
)

In [18]:
#5. Income Level Category
df['income_level'] = pd.cut(
    df['income'],
    bins=[0, 5000, 15000, df['income'].max()],
    labels=['Low Income', 'Middle Income', 'High Income']
)

In [19]:
df.head()

Unnamed: 0,country,child_mort,exports,health,imports,income,inflation,life_expec,total_fer,gdpp,fertility_level,development_level,mortality_risk,inflation_risk,income_level
0,Afghanistan,90.2,10.0,7.58,44.9,1610,9.44,56.2,5.82,553,High,Developing,High,Moderate,Low Income
1,Albania,16.6,28.0,6.55,48.6,9930,4.49,76.3,1.65,4090,Low,Developing,Low,Low,Middle Income
2,Algeria,27.3,38.4,4.17,31.4,12900,16.1,76.5,2.89,4460,Medium,Developing,Medium,High,Middle Income
3,Angola,119.0,62.3,2.85,42.9,5900,22.4,60.1,6.16,3530,High,Developing,High,High,Middle Income
4,Antigua and Barbuda,10.3,45.5,6.03,58.9,19100,1.44,76.8,2.13,12200,Medium,Developed,Low,Low,High Income


#### e. Save to Excel

In [20]:
df.to_excel('1. Country Data.xlsx', index=False)