# KLIMATA ILOILO DATA EXTRACTION AND PREPROCESSING STAGE

### As we continue to preprocess data, we extract and process the Relative Wealth Index (RWI) dataset. RWI is a measurement of economic status per barangay.

### Importing essential libraries

In [1]:
import pandas as pd
import numpy as np

### Importing CSV file for Relative Wealth Data (RWI)

In [2]:
rwi_df = pd.read_csv("tm_relative_wealth_index.csv")

In [4]:
rwi_df.head()

Unnamed: 0,uuid,adm4_pcode,date,freq,rwi_max,rwi_mean,rwi_median,rwi_min,rwi_std
0,TMRWI000000,PH015518001,2016-01-01,Y,0.729052,0.6686,0.670609,0.63657,0.03064
1,TMRWI000001,PH112402081,2016-01-01,Y,0.65761,0.531397,0.530826,0.453885,0.035633
2,TMRWI000002,PH063022043,2016-01-01,Y,0.623478,0.605305,0.611521,0.580916,0.021951
3,TMRWI000003,PH112402082,2016-01-01,Y,0.590676,0.48769,0.484263,0.413577,0.038916
4,TMRWI000004,PH097332060,2016-01-01,Y,0.462071,0.374796,0.366564,0.321981,0.033861


In [14]:
rwi_df.isnull().sum()

uuid          0
adm4_pcode    0
date          0
rwi_max       0
rwi_mean      0
rwi_median    0
rwi_min       0
rwi_std       0
dtype: int64

### Checking of RWI dataframe rows and columns

In [16]:
rwi_df.shape

(6153, 8)

### Cleans up all column names for consistency

In [8]:
rwi_df.columns = rwi_df.columns.str.strip().str.lower().str.replace(' ', '_')

### Replacing of missing values with the median of the column

In [9]:
num_cols = rwi_df.select_dtypes(include=['float64', 'int64']).columns
rwi_df[num_cols] = rwi_df[num_cols].fillna(rwi_df[num_cols].median())

### Converting column 'date' into proper datetime format

In [11]:
if 'date' in rwi_df.columns:
    rwi_df['date'] = pd.to_datetime(rwi_df['date'], errors='coerce')

### Cleans up all text (categorical) columns in the RWI dataframe for consistency

In [12]:
cat_cols = rwi_df.select_dtypes(include=['object']).columns
for col in cat_cols:
    rwi_df[col] = rwi_df[col].str.strip().str.lower()

### Drops irrelavant column

In [13]:
rwi_df = rwi_df.drop(columns=['freq'])

### After preprocessing the RWI dataset, we merge to the locations dataset to match the barangay to their Relative Wealth Index.

### Importing CSV file for Location Data

In [17]:
location_df = pd.read_csv("LOCATION.csv")

### Merging RWI dataframe and Locations dataframe

In [18]:
RWI_ILOILO = pd.merge(rwi_df, location_df, on="adm4_pcode", how='inner')

### Viewing the merged dataset

In [19]:
RWI_ILOILO.head()

Unnamed: 0,uuid,adm4_pcode,date,rwi_max,rwi_mean,rwi_median,rwi_min,rwi_std,unnamed:_0,adm1_en,adm1_pcode,adm2_en,adm2_pcode,adm3_en,adm3_pcode,adm4_en,brgy_total_area
0,tmrwi000000,ph015518001,2016-01-01,0.729052,0.6686,0.670609,0.63657,0.03064,735,region i,ph010000000,pangasinan,ph015500000,dagupan city,ph015518000,bacayao norte,0.6558
1,tmrwi000001,ph112402081,2016-01-01,0.65761,0.531397,0.530826,0.453885,0.035633,119,region xi,ph110000000,davao del sur,ph112400000,davao city,ph112402000,mulig,9.3236
2,tmrwi000002,ph063022043,2016-01-01,0.623478,0.605305,0.611521,0.580916,0.021951,35,region vi,ph060000000,iloilo,ph063000000,iloilo city,ph063022000,mabolo-delgado,0.0524
3,tmrwi000003,ph112402082,2016-01-01,0.590676,0.48769,0.484263,0.413577,0.038916,707,region xi,ph110000000,davao del sur,ph112400000,davao city,ph112402000,new carmen,11.1305
4,tmrwi000004,ph097332060,2016-01-01,0.462071,0.374796,0.366564,0.321981,0.033861,76,region ix,ph090000000,zamboanga del sur,ph097300000,zamboanga city,ph097332000,patalon,16.0204


### Only selecting rows with Iloilo City as city name

In [20]:
RWI_ILOILO = RWI_ILOILO[RWI_ILOILO['adm3_en'] == 'iloilo city']

### Dropping Irrelevant columns

In [21]:
RWI_ILOILO = RWI_ILOILO.drop(columns=['unnamed:_0', 'adm1_en', 'adm1_pcode', 'adm2_en', 'adm2_pcode', 'adm3_en', 'adm3_pcode'])

In [26]:
RWI_ILOILO.shape

(1260, 10)

### Importing of Final RWI dataframe with respective locations as a csv file

In [27]:
RWI_ILOILO.to_csv("RW_INDEX.csv", index=False)